From ff9761c70d95ed7e25a5adcd53f28ca468181108 Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Wed, 12 Jun 2024 12:44:01 -0700 Subject: [PATCH 1/7] grok validate patterns recusrion to iterative Signed-off-by: Sandesh Kumar --- CHANGELOG.md | 1 + .../main/java/org/opensearch/grok/Grok.java | 119 +++++++++++++----- 2 files changed, 88 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6138e6dd969b1..eddd7bea304f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086)) - Fix the computed max shards of cluster to avoid int overflow ([#14155](https://github.com/opensearch-project/OpenSearch/pull/14155)) - Fixed rest-high-level client searchTemplate & mtermVectors endpoints to have a leading slash ([#14465](https://github.com/opensearch-project/OpenSearch/pull/14465)) +- Refactoring Grok.validatePatternBank by using an iterative approach ([#14206](https://github.com/opensearch-project/OpenSearch/pull/14206)) ### Security diff --git a/libs/grok/src/main/java/org/opensearch/grok/Grok.java b/libs/grok/src/main/java/org/opensearch/grok/Grok.java index 7aa3347ba4f4b..1b1d254f4bcfe 100644 --- a/libs/grok/src/main/java/org/opensearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/opensearch/grok/Grok.java @@ -37,14 +37,18 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Stack; +import java.util.Set; import java.util.function.Consumer; import org.jcodings.specific.UTF8Encoding; @@ -128,7 +132,7 @@ private Grok( expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE, - message -> logCallBack.accept(message) + logCallBack::accept ); List captureConfig = new ArrayList<>(); @@ -144,7 +148,7 @@ private Grok( */ private void validatePatternBank() { for (String patternName : patternBank.keySet()) { - validatePatternBank(patternName, new Stack<>()); + validatePatternBank(patternName); } } @@ -156,33 +160,81 @@ private void validatePatternBank() { * a reference to another named pattern. This method will navigate to all these named patterns and * check for a circular reference. */ - private void validatePatternBank(String patternName, Stack path) { - String pattern = patternBank.get(patternName); - boolean isSelfReference = pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":"); - if (isSelfReference) { - throwExceptionForCircularReference(patternName, pattern); - } else if (path.contains(patternName)) { - // current pattern name is already in the path, fetch its predecessor - String prevPatternName = path.pop(); - String prevPattern = patternBank.get(prevPatternName); - throwExceptionForCircularReference(prevPatternName, prevPattern, patternName, path); - } - path.push(patternName); - for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) { - int begin = i + 2; - int syntaxEndIndex = pattern.indexOf('}', begin); - if (syntaxEndIndex == -1) { - throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]"); + private void validatePatternBank(String initialPatternName) { + Deque queue = new ArrayDeque<>(); + Set visitedPatterns = new HashSet<>(); + Map> pathMap = new HashMap<>(); + + Deque initialPath = new ArrayDeque<>(); + initialPath.push(initialPatternName); + pathMap.put(initialPatternName, initialPath); + queue.push(new Frame(initialPatternName, initialPath, 0)); + + while (!queue.isEmpty()) { + Frame frame = queue.peek(); + String patternName = frame.patternName; + Deque path = frame.path; + int startIndex = frame.startIndex; + String pattern = patternBank.get(patternName); + + if (visitedPatterns.contains(patternName)) { + queue.pop(); + continue; } - int semanticNameIndex = pattern.indexOf(':', begin); - int end = syntaxEndIndex; - if (semanticNameIndex != -1) { - end = Math.min(syntaxEndIndex, semanticNameIndex); + + visitedPatterns.add(patternName); + boolean foundDependency = false; + + for (int i = startIndex; i < pattern.length(); i++) { + if (pattern.startsWith("%{", i)) { + int begin = i + 2; + int syntaxEndIndex = pattern.indexOf('}', begin); + if (syntaxEndIndex == -1) { + throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]"); + } + + int semanticNameIndex = pattern.indexOf(':', begin); + int end = semanticNameIndex == -1 ? syntaxEndIndex : Math.min(syntaxEndIndex, semanticNameIndex); + + String dependsOnPattern = pattern.substring(begin, end); + + if (dependsOnPattern.equals(patternName)) { + throwExceptionForCircularReference(patternName, pattern); + } + + if (pathMap.containsKey(dependsOnPattern)) { + path.removeFirst(); + throwExceptionForCircularReference(patternName, pattern, dependsOnPattern, path); + } + + Deque newPath = new ArrayDeque<>(path); + newPath.push(dependsOnPattern); + pathMap.put(dependsOnPattern, newPath); + + queue.push(new Frame(dependsOnPattern, newPath, 0)); + frame.startIndex = i + 1; + foundDependency = true; + break; + } } - String dependsOnPattern = pattern.substring(begin, end); - validatePatternBank(dependsOnPattern, path); + + if (!foundDependency) { + pathMap.remove(patternName); + queue.pop(); + } + } + } + + private static class Frame { + String patternName; + Deque path; + int startIndex; + + Frame(String patternName, Deque path, int startIndex) { + this.patternName = patternName; + this.path = path; + this.startIndex = startIndex; } - path.pop(); } private static void throwExceptionForCircularReference(String patternName, String pattern) { @@ -193,7 +245,7 @@ private static void throwExceptionForCircularReference( String patternName, String pattern, String originPatterName, - Stack path + Deque path ) { StringBuilder message = new StringBuilder("circular reference in pattern ["); message.append(patternName).append("][").append(pattern).append("]"); @@ -201,7 +253,12 @@ private static void throwExceptionForCircularReference( message.append(" back to pattern [").append(originPatterName).append("]"); } if (path != null && path.size() > 1) { - message.append(" via patterns [").append(String.join("=>", path)).append("]"); + Iterator pathIter = path.descendingIterator(); + message.append(" via patterns [").append(pathIter.next()); + while (pathIter.hasNext()) { + message.append("=>").append(pathIter.next()); + } + message.append("]"); } throw new IllegalArgumentException(message.toString()); } @@ -217,9 +274,7 @@ private String groupMatch(String name, Region region, String pattern) { int begin = region.getBeg(number); int end = region.getEnd(number); return new String(pattern.getBytes(StandardCharsets.UTF_8), begin, end - begin, StandardCharsets.UTF_8); - } catch (StringIndexOutOfBoundsException e) { - return null; - } catch (ValueException e) { + } catch (StringIndexOutOfBoundsException | ValueException e) { return null; } } From 8510dc1486e33b5b8d100ca2cbd4093cae24e840 Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Mon, 17 Jun 2024 19:28:14 +0530 Subject: [PATCH 2/7] Add max depth in resolving a pattern to avoid OOM Signed-off-by: Sandesh Kumar --- libs/grok/src/main/java/org/opensearch/grok/Grok.java | 5 +++++ .../src/test/java/org/opensearch/grok/GrokTests.java | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/libs/grok/src/main/java/org/opensearch/grok/Grok.java b/libs/grok/src/main/java/org/opensearch/grok/Grok.java index 1b1d254f4bcfe..ab4f555310b7c 100644 --- a/libs/grok/src/main/java/org/opensearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/opensearch/grok/Grok.java @@ -90,6 +90,7 @@ public final class Grok { UTF8Encoding.INSTANCE, Syntax.DEFAULT ); + private static final int MAX_PATTERN_DEPTH_SIZE = 1_000; private static final int MAX_TO_REGEX_ITERATIONS = 100_000; // sanity limit @@ -222,6 +223,10 @@ private void validatePatternBank(String initialPatternName) { pathMap.remove(patternName); queue.pop(); } + + if (queue.size() > MAX_PATTERN_DEPTH_SIZE) { + throw new IllegalArgumentException("Pattern references exceeded maximum depth of " + MAX_PATTERN_DEPTH_SIZE); + } } } diff --git a/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java b/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java index a37689e051c67..78a3ea9935915 100644 --- a/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java @@ -377,6 +377,16 @@ public void testCircularReference() { "circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " + "via patterns [NAME1=>NAME2=>NAME3=>NAME4]", e.getMessage() ); + + e = expectThrows(IllegalArgumentException.class, () -> { + Map bank = new TreeMap<>(); + for (int i = 1; i <= 1001; i++) { + bank.put("NAME" + i, "!!!%{NAME" + (i + 1) + "}!!!"); + } + String pattern = "%{NAME1}"; + new Grok(bank, pattern, false, logger::warn); + }); + assertEquals("Pattern references exceeded maximum depth of 1000", e.getMessage()); } public void testMalformedPattern() { From ea6451c39c5dfe6817b8856873fbd3ab9ee6ecfb Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Tue, 18 Jun 2024 13:49:34 +0530 Subject: [PATCH 3/7] change path from deque to arraylist Signed-off-by: Sandesh Kumar --- .../main/java/org/opensearch/grok/Grok.java | 33 +++++++------------ 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/libs/grok/src/main/java/org/opensearch/grok/Grok.java b/libs/grok/src/main/java/org/opensearch/grok/Grok.java index ab4f555310b7c..c56aae4145d61 100644 --- a/libs/grok/src/main/java/org/opensearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/opensearch/grok/Grok.java @@ -164,17 +164,17 @@ private void validatePatternBank() { private void validatePatternBank(String initialPatternName) { Deque queue = new ArrayDeque<>(); Set visitedPatterns = new HashSet<>(); - Map> pathMap = new HashMap<>(); + Map> pathMap = new HashMap<>(); - Deque initialPath = new ArrayDeque<>(); - initialPath.push(initialPatternName); + List initialPath = new ArrayList<>(); + initialPath.add(initialPatternName); pathMap.put(initialPatternName, initialPath); queue.push(new Frame(initialPatternName, initialPath, 0)); while (!queue.isEmpty()) { Frame frame = queue.peek(); String patternName = frame.patternName; - Deque path = frame.path; + List path = frame.path; int startIndex = frame.startIndex; String pattern = patternBank.get(patternName); @@ -204,12 +204,11 @@ private void validatePatternBank(String initialPatternName) { } if (pathMap.containsKey(dependsOnPattern)) { - path.removeFirst(); - throwExceptionForCircularReference(patternName, pattern, dependsOnPattern, path); + throwExceptionForCircularReference(patternName, pattern, dependsOnPattern, path.subList(0, path.size() - 1)); } - Deque newPath = new ArrayDeque<>(path); - newPath.push(dependsOnPattern); + List newPath = new ArrayList<>(path); + newPath.add(dependsOnPattern); pathMap.put(dependsOnPattern, newPath); queue.push(new Frame(dependsOnPattern, newPath, 0)); @@ -232,10 +231,10 @@ private void validatePatternBank(String initialPatternName) { private static class Frame { String patternName; - Deque path; + List path; int startIndex; - Frame(String patternName, Deque path, int startIndex) { + Frame(String patternName, List path, int startIndex) { this.patternName = patternName; this.path = path; this.startIndex = startIndex; @@ -246,24 +245,14 @@ private static void throwExceptionForCircularReference(String patternName, Strin throwExceptionForCircularReference(patternName, pattern, null, null); } - private static void throwExceptionForCircularReference( - String patternName, - String pattern, - String originPatterName, - Deque path - ) { + private static void throwExceptionForCircularReference(String patternName, String pattern, String originPatterName, List path) { StringBuilder message = new StringBuilder("circular reference in pattern ["); message.append(patternName).append("][").append(pattern).append("]"); if (originPatterName != null) { message.append(" back to pattern [").append(originPatterName).append("]"); } if (path != null && path.size() > 1) { - Iterator pathIter = path.descendingIterator(); - message.append(" via patterns [").append(pathIter.next()); - while (pathIter.hasNext()) { - message.append("=>").append(pathIter.next()); - } - message.append("]"); + message.append(" via patterns [").append(String.join("=>", path)).append("]"); } throw new IllegalArgumentException(message.toString()); } From 95eaf1971da76e80cb2af1aafed73e92253f2eb8 Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Tue, 18 Jun 2024 16:08:26 +0530 Subject: [PATCH 4/7] rename queue to stack Signed-off-by: Sandesh Kumar --- .../src/main/java/org/opensearch/grok/Grok.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libs/grok/src/main/java/org/opensearch/grok/Grok.java b/libs/grok/src/main/java/org/opensearch/grok/Grok.java index c56aae4145d61..286e7e2665eb3 100644 --- a/libs/grok/src/main/java/org/opensearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/opensearch/grok/Grok.java @@ -162,24 +162,24 @@ private void validatePatternBank() { * check for a circular reference. */ private void validatePatternBank(String initialPatternName) { - Deque queue = new ArrayDeque<>(); + Deque stack = new ArrayDeque<>(); Set visitedPatterns = new HashSet<>(); Map> pathMap = new HashMap<>(); List initialPath = new ArrayList<>(); initialPath.add(initialPatternName); pathMap.put(initialPatternName, initialPath); - queue.push(new Frame(initialPatternName, initialPath, 0)); + stack.push(new Frame(initialPatternName, initialPath, 0)); - while (!queue.isEmpty()) { - Frame frame = queue.peek(); + while (!stack.isEmpty()) { + Frame frame = stack.peek(); String patternName = frame.patternName; List path = frame.path; int startIndex = frame.startIndex; String pattern = patternBank.get(patternName); if (visitedPatterns.contains(patternName)) { - queue.pop(); + stack.pop(); continue; } @@ -211,7 +211,7 @@ private void validatePatternBank(String initialPatternName) { newPath.add(dependsOnPattern); pathMap.put(dependsOnPattern, newPath); - queue.push(new Frame(dependsOnPattern, newPath, 0)); + stack.push(new Frame(dependsOnPattern, newPath, 0)); frame.startIndex = i + 1; foundDependency = true; break; @@ -220,10 +220,10 @@ private void validatePatternBank(String initialPatternName) { if (!foundDependency) { pathMap.remove(patternName); - queue.pop(); + stack.pop(); } - if (queue.size() > MAX_PATTERN_DEPTH_SIZE) { + if (stack.size() > MAX_PATTERN_DEPTH_SIZE) { throw new IllegalArgumentException("Pattern references exceeded maximum depth of " + MAX_PATTERN_DEPTH_SIZE); } } From 93ae99faba3540d169a6741b215c4e97c6473184 Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Fri, 21 Jun 2024 09:00:51 +0530 Subject: [PATCH 5/7] Change max depth to 500 Signed-off-by: Sandesh Kumar --- libs/grok/src/main/java/org/opensearch/grok/Grok.java | 2 +- libs/grok/src/test/java/org/opensearch/grok/GrokTests.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/grok/src/main/java/org/opensearch/grok/Grok.java b/libs/grok/src/main/java/org/opensearch/grok/Grok.java index 286e7e2665eb3..fc2325a367afb 100644 --- a/libs/grok/src/main/java/org/opensearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/opensearch/grok/Grok.java @@ -90,7 +90,7 @@ public final class Grok { UTF8Encoding.INSTANCE, Syntax.DEFAULT ); - private static final int MAX_PATTERN_DEPTH_SIZE = 1_000; + private static final int MAX_PATTERN_DEPTH_SIZE = 500; private static final int MAX_TO_REGEX_ITERATIONS = 100_000; // sanity limit diff --git a/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java b/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java index 78a3ea9935915..8476d541aa46e 100644 --- a/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/opensearch/grok/GrokTests.java @@ -380,13 +380,13 @@ public void testCircularReference() { e = expectThrows(IllegalArgumentException.class, () -> { Map bank = new TreeMap<>(); - for (int i = 1; i <= 1001; i++) { + for (int i = 1; i <= 501; i++) { bank.put("NAME" + i, "!!!%{NAME" + (i + 1) + "}!!!"); } String pattern = "%{NAME1}"; new Grok(bank, pattern, false, logger::warn); }); - assertEquals("Pattern references exceeded maximum depth of 1000", e.getMessage()); + assertEquals("Pattern references exceeded maximum depth of 500", e.getMessage()); } public void testMalformedPattern() { From f507270ac6e91a3c32fda3472e3ff1bbc9db01de Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Tue, 25 Jun 2024 23:31:43 +0530 Subject: [PATCH 6/7] typo originPatternName fix Signed-off-by: Sandesh Kumar --- libs/grok/src/main/java/org/opensearch/grok/Grok.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/grok/src/main/java/org/opensearch/grok/Grok.java b/libs/grok/src/main/java/org/opensearch/grok/Grok.java index fc2325a367afb..96e79e0189300 100644 --- a/libs/grok/src/main/java/org/opensearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/opensearch/grok/Grok.java @@ -245,11 +245,11 @@ private static void throwExceptionForCircularReference(String patternName, Strin throwExceptionForCircularReference(patternName, pattern, null, null); } - private static void throwExceptionForCircularReference(String patternName, String pattern, String originPatterName, List path) { + private static void throwExceptionForCircularReference(String patternName, String pattern, String originPatternName, List path) { StringBuilder message = new StringBuilder("circular reference in pattern ["); message.append(patternName).append("][").append(pattern).append("]"); - if (originPatterName != null) { - message.append(" back to pattern [").append(originPatterName).append("]"); + if (originPatternName != null) { + message.append(" back to pattern [").append(originPatternName).append("]"); } if (path != null && path.size() > 1) { message.append(" via patterns [").append(String.join("=>", path)).append("]"); From 520235dc2bbb1a18e0b1e55ff883da8bba52ac15 Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Wed, 26 Jun 2024 11:00:16 +0530 Subject: [PATCH 7/7] spotless Signed-off-by: Sandesh Kumar --- libs/grok/src/main/java/org/opensearch/grok/Grok.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libs/grok/src/main/java/org/opensearch/grok/Grok.java b/libs/grok/src/main/java/org/opensearch/grok/Grok.java index 96e79e0189300..aa5b1a936b99d 100644 --- a/libs/grok/src/main/java/org/opensearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/opensearch/grok/Grok.java @@ -245,7 +245,12 @@ private static void throwExceptionForCircularReference(String patternName, Strin throwExceptionForCircularReference(patternName, pattern, null, null); } - private static void throwExceptionForCircularReference(String patternName, String pattern, String originPatternName, List path) { + private static void throwExceptionForCircularReference( + String patternName, + String pattern, + String originPatternName, + List path + ) { StringBuilder message = new StringBuilder("circular reference in pattern ["); message.append(patternName).append("][").append(pattern).append("]"); if (originPatternName != null) {