Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Grok validate pattern to iterative approach #14206

Merged
merged 12 commits into from
Jul 9, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix file cache initialization ([#14004](https://github.com/opensearch-project/OpenSearch/pull/14004))
- Handle NPE in GetResult if "found" field is missing ([#14552](https://github.com/opensearch-project/OpenSearch/pull/14552))
- Refactoring FilterPath.parse by using an iterative approach ([#14200](https://github.com/opensearch-project/OpenSearch/pull/14200))
- Refactoring Grok.validatePatternBank by using an iterative approach ([#14206](https://github.com/opensearch-project/OpenSearch/pull/14206))

### Security

Expand Down
122 changes: 88 additions & 34 deletions libs/grok/src/main/java/org/opensearch/grok/Grok.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,18 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Stack;
import java.util.Set;
import java.util.function.Consumer;

import org.jcodings.specific.UTF8Encoding;
Expand Down Expand Up @@ -86,6 +90,7 @@
UTF8Encoding.INSTANCE,
Syntax.DEFAULT
);
private static final int MAX_PATTERN_DEPTH_SIZE = 500;

private static final int MAX_TO_REGEX_ITERATIONS = 100_000; // sanity limit

Expand Down Expand Up @@ -128,7 +133,7 @@
expressionBytes.length,
Option.DEFAULT,
UTF8Encoding.INSTANCE,
message -> logCallBack.accept(message)
logCallBack::accept
);

List<GrokCaptureConfig> captureConfig = new ArrayList<>();
Expand All @@ -144,7 +149,7 @@
*/
private void validatePatternBank() {
for (String patternName : patternBank.keySet()) {
validatePatternBank(patternName, new Stack<>());
validatePatternBank(patternName);
}
}

Expand All @@ -156,33 +161,84 @@
* a reference to another named pattern. This method will navigate to all these named patterns and
* check for a circular reference.
*/
private void validatePatternBank(String patternName, Stack<String> path) {
String pattern = patternBank.get(patternName);
boolean isSelfReference = pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
if (isSelfReference) {
throwExceptionForCircularReference(patternName, pattern);
} else if (path.contains(patternName)) {
// current pattern name is already in the path, fetch its predecessor
String prevPatternName = path.pop();
String prevPattern = patternBank.get(prevPatternName);
throwExceptionForCircularReference(prevPatternName, prevPattern, patternName, path);
}
path.push(patternName);
for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
int begin = i + 2;
int syntaxEndIndex = pattern.indexOf('}', begin);
if (syntaxEndIndex == -1) {
throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]");
private void validatePatternBank(String initialPatternName) {
Deque<Frame> stack = new ArrayDeque<>();
Set<String> visitedPatterns = new HashSet<>();
Map<String, List<String>> pathMap = new HashMap<>();

List<String> initialPath = new ArrayList<>();
initialPath.add(initialPatternName);
pathMap.put(initialPatternName, initialPath);
stack.push(new Frame(initialPatternName, initialPath, 0));

while (!stack.isEmpty()) {
Frame frame = stack.peek();
String patternName = frame.patternName;
List<String> path = frame.path;
int startIndex = frame.startIndex;
String pattern = patternBank.get(patternName);

if (visitedPatterns.contains(patternName)) {
stack.pop();
continue;
}

visitedPatterns.add(patternName);
boolean foundDependency = false;

for (int i = startIndex; i < pattern.length(); i++) {
if (pattern.startsWith("%{", i)) {
int begin = i + 2;
int syntaxEndIndex = pattern.indexOf('}', begin);
if (syntaxEndIndex == -1) {
throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]");
}

int semanticNameIndex = pattern.indexOf(':', begin);
int end = semanticNameIndex == -1 ? syntaxEndIndex : Math.min(syntaxEndIndex, semanticNameIndex);

String dependsOnPattern = pattern.substring(begin, end);

if (dependsOnPattern.equals(patternName)) {
throwExceptionForCircularReference(patternName, pattern);

Check warning on line 203 in libs/grok/src/main/java/org/opensearch/grok/Grok.java

View check run for this annotation

Codecov / codecov/patch

libs/grok/src/main/java/org/opensearch/grok/Grok.java#L203

Added line #L203 was not covered by tests
msfroh marked this conversation as resolved.
Show resolved Hide resolved
}

if (pathMap.containsKey(dependsOnPattern)) {
throwExceptionForCircularReference(patternName, pattern, dependsOnPattern, path.subList(0, path.size() - 1));

Check warning on line 207 in libs/grok/src/main/java/org/opensearch/grok/Grok.java

View check run for this annotation

Codecov / codecov/patch

libs/grok/src/main/java/org/opensearch/grok/Grok.java#L207

Added line #L207 was not covered by tests
}

List<String> newPath = new ArrayList<>(path);
newPath.add(dependsOnPattern);
pathMap.put(dependsOnPattern, newPath);

stack.push(new Frame(dependsOnPattern, newPath, 0));
frame.startIndex = i + 1;
foundDependency = true;
break;
}
}
int semanticNameIndex = pattern.indexOf(':', begin);
int end = syntaxEndIndex;
if (semanticNameIndex != -1) {
end = Math.min(syntaxEndIndex, semanticNameIndex);

if (!foundDependency) {
pathMap.remove(patternName);
stack.pop();
}

if (stack.size() > MAX_PATTERN_DEPTH_SIZE) {
throw new IllegalArgumentException("Pattern references exceeded maximum depth of " + MAX_PATTERN_DEPTH_SIZE);
}
String dependsOnPattern = pattern.substring(begin, end);
validatePatternBank(dependsOnPattern, path);
}
path.pop();
}

private static class Frame {
String patternName;
List<String> path;
int startIndex;

Frame(String patternName, List<String> path, int startIndex) {
this.patternName = patternName;
this.path = path;
this.startIndex = startIndex;
}
}

private static void throwExceptionForCircularReference(String patternName, String pattern) {
Expand All @@ -192,13 +248,13 @@
private static void throwExceptionForCircularReference(
String patternName,
String pattern,
String originPatterName,
Stack<String> path
String originPatternName,
List<String> path
) {
StringBuilder message = new StringBuilder("circular reference in pattern [");
message.append(patternName).append("][").append(pattern).append("]");
if (originPatterName != null) {
message.append(" back to pattern [").append(originPatterName).append("]");
if (originPatternName != null) {
message.append(" back to pattern [").append(originPatternName).append("]");
}
if (path != null && path.size() > 1) {
message.append(" via patterns [").append(String.join("=>", path)).append("]");
Expand All @@ -217,9 +273,7 @@
int begin = region.getBeg(number);
int end = region.getEnd(number);
return new String(pattern.getBytes(StandardCharsets.UTF_8), begin, end - begin, StandardCharsets.UTF_8);
} catch (StringIndexOutOfBoundsException e) {
return null;
} catch (ValueException e) {
} catch (StringIndexOutOfBoundsException | ValueException e) {
return null;
}
}
Expand Down
10 changes: 10 additions & 0 deletions libs/grok/src/test/java/org/opensearch/grok/GrokTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,16 @@ public void testCircularReference() {
"circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " + "via patterns [NAME1=>NAME2=>NAME3=>NAME4]",
e.getMessage()
);

e = expectThrows(IllegalArgumentException.class, () -> {
Map<String, String> bank = new TreeMap<>();
for (int i = 1; i <= 501; i++) {
bank.put("NAME" + i, "!!!%{NAME" + (i + 1) + "}!!!");
}
String pattern = "%{NAME1}";
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("Pattern references exceeded maximum depth of 500", e.getMessage());
}

public void testMalformedPattern() {
Expand Down
Loading