|
11 | 11 | import java.util.ArrayList; |
12 | 12 | import java.util.Arrays; |
13 | 13 | import java.util.Collection; |
| 14 | +import java.util.Collections; |
14 | 15 | import java.util.HashMap; |
15 | 16 | import java.util.Map; |
16 | 17 |
|
@@ -344,4 +345,42 @@ public void testFindBestGrokMatchFromExamplesGivenMatchAllRegex() { |
344 | 345 | "[tweets_by_location] Killing job"); |
345 | 346 | assertThat(GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples), equalTo(regex)); |
346 | 347 | } |
| 348 | + |
| 349 | + public void testFindBestGrokMatchFromExamplesGivenTruncated() { |
| 350 | + String regex = ".*?BST.+?dave.+?bank3.+?CONTEXT.+?SQL.+?statement.+?SELECT.+?time_series_ids_tmp\\.evidence_id" + |
| 351 | + ".+?time_series_ids_tmp\\.time_series_id.+?is_delta.+?GREATEST.+?usual_interval.+?FROM.+?time_series_ids_tmp.+?" + |
| 352 | + "WHERE.+?found_peak_value.+?FALSE.+?ORDER.+?BY.+?time_series_ids_tmp\\.magnitude.+?DESC.+?" + |
| 353 | + "time_series_ids_tmp\\.scaling_factor.+?DESC.+?time_series_ids_tmp\\.significance.+?DESC.+?" + |
| 354 | + "time_series_ids_tmp\\.evidence_id.+?DESC.+?LIMIT.+?PL.+?pgSQL.+?function.+?probable_cause_list_common.+?" + |
| 355 | + "integer.+?integer.+?integer.+?line.+?at.+?SQL.+?statement.+?SQL.+?statement.+?SELECT.+?" + |
| 356 | + "probable_cause_list_common.+?evidenceIdIn.+?linkGroupId.+?timeSpanSeconds.+?PL.+?pgSQL.+?function.+?" + |
| 357 | + "probable_cause_list.+?integer.+?integer.+?line.+?at.+?PERFORM.*"; |
| 358 | + Collection<String> examples = Collections.singletonList("2013-05-16 12:13:45 BST:192.168.61.59(51438):dave:@bank3:[19084]: " + |
| 359 | + "CONTEXT: SQL statement \"SELECT\n" + |
| 360 | + " time_series_ids_tmp.evidence_id,\n" + |
| 361 | + " time_series_ids_tmp.time_series_id,\n" + |
| 362 | + " is_delta,\n" + |
| 363 | + " GREATEST(usual_interval, 1)\n" + |
| 364 | + " FROM\n" + |
| 365 | + " time_series_ids_tmp\n" + |
| 366 | + " WHERE\n" + |
| 367 | + " found_peak_value = FALSE\n" + |
| 368 | + " ORDER BY\n" + |
| 369 | + " \n" + |
| 370 | + " \n" + |
| 371 | + " \n" + |
| 372 | + " time_series_ids_tmp.magnitude DESC,\n" + |
| 373 | + " time_series_ids_tmp.scaling_factor DESC,\n" + |
| 374 | + " time_series_ids_tmp.significance DESC,\n" + |
| 375 | + " time_series_ids_tmp.evidence_id DESC\n" + |
| 376 | + " LIMIT\n" + |
| 377 | + " 1\"\n" + |
| 378 | + " PL/pgSQL function probable_cause_list_common(integer,integer,integer) line 255 at SQL statement\n" + |
| 379 | + " SQL statement \"SELECT probable_cause_list_common(evidenceIdIn, linkGroupId, timeSpanSeconds)\"\n" + |
| 380 | + " PL/pgSQL function probable_cause_list..."); |
| 381 | + // Our algorithm for converting examples to Grok patterns that pick out useful fields doesn't work in |
| 382 | + // this case because the regex doesn't match the example (because the example has been truncated and |
| 383 | + // the regex contains pieces that would match parts of the original message beyond the truncation point) |
| 384 | + assertThat(GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples), equalTo(regex)); |
| 385 | + } |
347 | 386 | } |
0 commit comments