diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java index 0fb9efa62dbb7..530c76eb52e73 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java @@ -28,6 +28,8 @@ */ public final class GrokPatternCreator { + private static final boolean ECS_COMPATIBILITY = true; + private static final Logger logger = LogManager.getLogger(GrokPatternCreator.class); private static final String PREFACE = "preface"; @@ -39,7 +41,7 @@ public final class GrokPatternCreator { * such that more generic patterns come after more specific patterns. */ private static final List ORDERED_CANDIDATE_GROK_PATTERNS = Arrays.asList( - new GrokPatternCandidate("TOMCAT_DATESTAMP", "timestamp"), + new GrokPatternCandidate("TOMCATLEGACY_DATESTAMP", "timestamp"), new GrokPatternCandidate("TIMESTAMP_ISO8601", "timestamp"), new GrokPatternCandidate("DATESTAMP_RFC822", "timestamp"), new GrokPatternCandidate("DATESTAMP_RFC2822", "timestamp"), @@ -51,8 +53,8 @@ public final class GrokPatternCreator { new GrokPatternCandidate("CISCOTIMESTAMP", "timestamp"), new GrokPatternCandidate("DATE", "date"), new GrokPatternCandidate("TIME", "time"), - new GrokPatternCandidate("LOGLEVEL", "loglevel"), - new GrokPatternCandidate("URI", "uri"), + new GrokPatternCandidate("LOGLEVEL", "log.level"), + new GrokPatternCandidate("URI", "url.original"), new GrokPatternCandidate("UUID", "uuid"), new GrokPatternCandidate("MAC", "macaddress"), // Can't use \b as the breaks, because slashes are not "word" characters @@ -284,7 +286,7 @@ static class GrokPatternCandidate { this.grokPatternName = grokPatternName; this.fieldName = fieldName; this.grok = new Grok( - Grok.getBuiltinPatterns(false), + Grok.getBuiltinPatterns(ECS_COMPATIBILITY), "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", logger::warn ); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java index 1179392f22229..52161177d7abb 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java @@ -28,7 +28,7 @@ public void testBuildFieldName() { assertEquals("field3", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); assertEquals("timestamp", GrokPatternCreator.buildFieldName(fieldNameCountStore, "timestamp")); assertEquals("field4", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); - assertEquals("uri", GrokPatternCreator.buildFieldName(fieldNameCountStore, "uri")); + assertEquals("url.original", GrokPatternCreator.buildFieldName(fieldNameCountStore, "url.original")); assertEquals("timestamp2", GrokPatternCreator.buildFieldName(fieldNameCountStore, "timestamp")); assertEquals("field5", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); } @@ -85,7 +85,7 @@ public void testAppendBestGrokMatchForStringsGivenTimestampsAndLogLevels() { mustMatchStrings ); - assertEquals(".+?%{TIMESTAMP_ISO8601:timestamp}.+?%{LOGLEVEL:loglevel}.+?", overallGrokPatternBuilder.toString()); + assertEquals(".+?%{TIMESTAMP_ISO8601:timestamp}.+?%{LOGLEVEL:log.level}.+?", overallGrokPatternBuilder.toString()); } public void testAppendBestGrokMatchForStringsGivenTomcatDatestamps() { @@ -110,7 +110,7 @@ public void testAppendBestGrokMatchForStringsGivenTomcatDatestamps() { mustMatchStrings ); - assertEquals(".*?%{TOMCAT_DATESTAMP:timestamp}.+?%{LOGLEVEL:loglevel}.+?", overallGrokPatternBuilder.toString()); + assertEquals(".*?%{TOMCATLEGACY_DATESTAMP:timestamp}.+?%{LOGLEVEL:log.level}.+?", overallGrokPatternBuilder.toString()); } public void testAppendBestGrokMatchForStringsGivenTrappyFloatCandidates() { @@ -252,7 +252,7 @@ public void testAppendBestGrokMatchForStringsGivenUris() { mustMatchStrings ); - assertEquals(".*?%{URI:uri}.*?", overallGrokPatternBuilder.toString()); + assertEquals(".*?%{URI:url.original}.*?", overallGrokPatternBuilder.toString()); } public void testAppendBestGrokMatchForStringsGivenPaths() { @@ -314,6 +314,29 @@ public void testFindBestGrokMatchFromExamplesGivenCatalinaLogs() { ); } + public void testFindBestGrokMatchFromExamplesGivenCatalina8Logs() { + + String regex = ".*?WARNING.+?org\\.apache\\.tomcat\\.util\\.http\\.Parameters.+?processParameters.+?Parameters.+?" + + "Invalid.+?chunk.+?ignored.*"; + // The embedded newline ensures the regular expressions we're using are compiled with Pattern.DOTALL + Collection examples = Arrays.asList( + "29-Aug-2009 12:03:33 WARNING [main] org.apache.tomcat.util.http.Parameters processParameters: Parameters: \n" + + "Invalid chunk ignored.", + "29-Aug-2009 12:03:33 WARNING [main] org.apache.tomcat.util.http.Parameters processParameters: Parameters: \n" + + "Invalid chunk ignored.", + "29-Aug-2009 12:03:33 WARNING [main] org.apache.tomcat.util.http.Parameters processParameters: Parameters: \n" + + "Invalid chunk ignored.", + "29-Aug-2009 12:03:33 WARNING [main] org.apache.tomcat.util.http.Parameters processParameters: Parameters: \n" + + "Invalid chunk ignored." + ); + + assertEquals( + ".*?%{CATALINA_DATESTAMP:timestamp}.+?WARNING.+?org\\.apache\\.tomcat\\.util\\.http\\.Parameters.+?processParameters.+?" + + "Parameters.+?Invalid.+?chunk.+?ignored.*", + GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples) + ); + } + public void testFindBestGrokMatchFromExamplesGivenMultiTimestampLogs() { String regex = ".*?Authpriv.+?Info.+?sshd.+?subsystem.+?request.+?for.+?sftp.*";