Skip to content

Commit

Permalink
Refactor Grok validate pattern to iterative approach (opensearch-proj…
Browse files Browse the repository at this point in the history
…ect#14206)

* grok validate patterns recusrion to iterative

Signed-off-by: Sandesh Kumar <[email protected]>

* Add max depth in resolving a pattern to avoid OOM

Signed-off-by: Sandesh Kumar <[email protected]>

* change path from deque to arraylist

Signed-off-by: Sandesh Kumar <[email protected]>

* rename queue to stack

Signed-off-by: Sandesh Kumar <[email protected]>

* Change max depth to 500

Signed-off-by: Sandesh Kumar <[email protected]>

* typo originPatternName fix

Signed-off-by: Sandesh Kumar <[email protected]>

* spotless

Signed-off-by: Sandesh Kumar <[email protected]>

---------

Signed-off-by: Sandesh Kumar <[email protected]>
  • Loading branch information
sandeshkr419 authored and harshavamsi committed Jul 12, 2024
1 parent a2c2114 commit b8aa05b
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 34 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix file cache initialization ([#14004](https://github.com/opensearch-project/OpenSearch/pull/14004))
- Handle NPE in GetResult if "found" field is missing ([#14552](https://github.com/opensearch-project/OpenSearch/pull/14552))
- Refactoring FilterPath.parse by using an iterative approach ([#14200](https://github.com/opensearch-project/OpenSearch/pull/14200))
- Refactoring Grok.validatePatternBank by using an iterative approach ([#14206](https://github.com/opensearch-project/OpenSearch/pull/14206))

### Security

Expand Down
122 changes: 88 additions & 34 deletions libs/grok/src/main/java/org/opensearch/grok/Grok.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,18 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Stack;
import java.util.Set;
import java.util.function.Consumer;

import org.jcodings.specific.UTF8Encoding;
Expand Down Expand Up @@ -86,6 +90,7 @@ public final class Grok {
UTF8Encoding.INSTANCE,
Syntax.DEFAULT
);
private static final int MAX_PATTERN_DEPTH_SIZE = 500;

private static final int MAX_TO_REGEX_ITERATIONS = 100_000; // sanity limit

Expand Down Expand Up @@ -128,7 +133,7 @@ private Grok(
expressionBytes.length,
Option.DEFAULT,
UTF8Encoding.INSTANCE,
message -> logCallBack.accept(message)
logCallBack::accept
);

List<GrokCaptureConfig> captureConfig = new ArrayList<>();
Expand All @@ -144,7 +149,7 @@ private Grok(
*/
private void validatePatternBank() {
for (String patternName : patternBank.keySet()) {
validatePatternBank(patternName, new Stack<>());
validatePatternBank(patternName);
}
}

Expand All @@ -156,33 +161,84 @@ private void validatePatternBank() {
* a reference to another named pattern. This method will navigate to all these named patterns and
* check for a circular reference.
*/
private void validatePatternBank(String patternName, Stack<String> path) {
String pattern = patternBank.get(patternName);
boolean isSelfReference = pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
if (isSelfReference) {
throwExceptionForCircularReference(patternName, pattern);
} else if (path.contains(patternName)) {
// current pattern name is already in the path, fetch its predecessor
String prevPatternName = path.pop();
String prevPattern = patternBank.get(prevPatternName);
throwExceptionForCircularReference(prevPatternName, prevPattern, patternName, path);
}
path.push(patternName);
for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
int begin = i + 2;
int syntaxEndIndex = pattern.indexOf('}', begin);
if (syntaxEndIndex == -1) {
throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]");
private void validatePatternBank(String initialPatternName) {
Deque<Frame> stack = new ArrayDeque<>();
Set<String> visitedPatterns = new HashSet<>();
Map<String, List<String>> pathMap = new HashMap<>();

List<String> initialPath = new ArrayList<>();
initialPath.add(initialPatternName);
pathMap.put(initialPatternName, initialPath);
stack.push(new Frame(initialPatternName, initialPath, 0));

while (!stack.isEmpty()) {
Frame frame = stack.peek();
String patternName = frame.patternName;
List<String> path = frame.path;
int startIndex = frame.startIndex;
String pattern = patternBank.get(patternName);

if (visitedPatterns.contains(patternName)) {
stack.pop();
continue;
}

visitedPatterns.add(patternName);
boolean foundDependency = false;

for (int i = startIndex; i < pattern.length(); i++) {
if (pattern.startsWith("%{", i)) {
int begin = i + 2;
int syntaxEndIndex = pattern.indexOf('}', begin);
if (syntaxEndIndex == -1) {
throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]");
}

int semanticNameIndex = pattern.indexOf(':', begin);
int end = semanticNameIndex == -1 ? syntaxEndIndex : Math.min(syntaxEndIndex, semanticNameIndex);

String dependsOnPattern = pattern.substring(begin, end);

if (dependsOnPattern.equals(patternName)) {
throwExceptionForCircularReference(patternName, pattern);
}

if (pathMap.containsKey(dependsOnPattern)) {
throwExceptionForCircularReference(patternName, pattern, dependsOnPattern, path.subList(0, path.size() - 1));
}

List<String> newPath = new ArrayList<>(path);
newPath.add(dependsOnPattern);
pathMap.put(dependsOnPattern, newPath);

stack.push(new Frame(dependsOnPattern, newPath, 0));
frame.startIndex = i + 1;
foundDependency = true;
break;
}
}
int semanticNameIndex = pattern.indexOf(':', begin);
int end = syntaxEndIndex;
if (semanticNameIndex != -1) {
end = Math.min(syntaxEndIndex, semanticNameIndex);

if (!foundDependency) {
pathMap.remove(patternName);
stack.pop();
}

if (stack.size() > MAX_PATTERN_DEPTH_SIZE) {
throw new IllegalArgumentException("Pattern references exceeded maximum depth of " + MAX_PATTERN_DEPTH_SIZE);
}
String dependsOnPattern = pattern.substring(begin, end);
validatePatternBank(dependsOnPattern, path);
}
path.pop();
}

private static class Frame {
String patternName;
List<String> path;
int startIndex;

Frame(String patternName, List<String> path, int startIndex) {
this.patternName = patternName;
this.path = path;
this.startIndex = startIndex;
}
}

private static void throwExceptionForCircularReference(String patternName, String pattern) {
Expand All @@ -192,13 +248,13 @@ private static void throwExceptionForCircularReference(String patternName, Strin
private static void throwExceptionForCircularReference(
String patternName,
String pattern,
String originPatterName,
Stack<String> path
String originPatternName,
List<String> path
) {
StringBuilder message = new StringBuilder("circular reference in pattern [");
message.append(patternName).append("][").append(pattern).append("]");
if (originPatterName != null) {
message.append(" back to pattern [").append(originPatterName).append("]");
if (originPatternName != null) {
message.append(" back to pattern [").append(originPatternName).append("]");
}
if (path != null && path.size() > 1) {
message.append(" via patterns [").append(String.join("=>", path)).append("]");
Expand All @@ -217,9 +273,7 @@ private String groupMatch(String name, Region region, String pattern) {
int begin = region.getBeg(number);
int end = region.getEnd(number);
return new String(pattern.getBytes(StandardCharsets.UTF_8), begin, end - begin, StandardCharsets.UTF_8);
} catch (StringIndexOutOfBoundsException e) {
return null;
} catch (ValueException e) {
} catch (StringIndexOutOfBoundsException | ValueException e) {
return null;
}
}
Expand Down
10 changes: 10 additions & 0 deletions libs/grok/src/test/java/org/opensearch/grok/GrokTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,16 @@ public void testCircularReference() {
"circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " + "via patterns [NAME1=>NAME2=>NAME3=>NAME4]",
e.getMessage()
);

e = expectThrows(IllegalArgumentException.class, () -> {
Map<String, String> bank = new TreeMap<>();
for (int i = 1; i <= 501; i++) {
bank.put("NAME" + i, "!!!%{NAME" + (i + 1) + "}!!!");
}
String pattern = "%{NAME1}";
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("Pattern references exceeded maximum depth of 500", e.getMessage());
}

public void testMalformedPattern() {
Expand Down

0 comments on commit b8aa05b

Please sign in to comment.