|  | 
| 15 | 15 | # | 
| 16 | 16 | 
 | 
| 17 | 17 | import re | 
|  | 18 | +from collections import OrderedDict | 
|  | 19 | + | 
|  | 20 | + | 
|  | 21 | +supported_headers = OrderedDict( | 
|  | 22 | +    [ | 
|  | 23 | +        ( | 
|  | 24 | +            "CEF", | 
|  | 25 | +            { | 
|  | 26 | +                "regex": r"\s(CEF:\d\|[^\|]+\|([^\|]+)\|[^\|]+\|[^\|]+\|[^\|]+\|([^\|]+)\|(.*))", | 
|  | 27 | +                "match_element": 1, | 
|  | 28 | +            }, | 
|  | 29 | +        ), | 
|  | 30 | +        ( | 
|  | 31 | +            "CEF_checkpoint", | 
|  | 32 | +            { | 
|  | 33 | +                "regex": r"(time=\d+\|[^\|]+\|([^\|]+)\|[^\|]+\|[^\|]+\|[^\|]+\|([^\|]+)\|(.*))", | 
|  | 34 | +                "match_element": 1, | 
|  | 35 | +            }, | 
|  | 36 | +        ), | 
|  | 37 | +        ( | 
|  | 38 | +            "rfc5424", | 
|  | 39 | +            { | 
|  | 40 | +                "regex": r"(?:(\d{4}[-]\d{2}[-]\d{2}[T]\d{2}[:]\d{2}[:]\d{2}(?:\.\d{1,6})?(?:[+-]\d{2}[:]\d{2}|Z)?))\s(?:([\w][\w\d\.@-]*)|-)\s(.*)$", | 
|  | 41 | +                "match_element": 3, | 
|  | 42 | +            }, | 
|  | 43 | +        ), | 
|  | 44 | +        ( | 
|  | 45 | +            "rfc3164", | 
|  | 46 | +            { | 
|  | 47 | +                "regex": r"([A-Z][a-z][a-z]\s{1,2}\d{1,2}(?:\s\d{4})?\s\d{2}[:]\d{2}[:]\d{2})\s+([\w][\w\d\.@-]*)\s\w*:?(.*)$", | 
|  | 48 | +                "match_element": 3, | 
|  | 49 | +            }, | 
|  | 50 | +        ), | 
|  | 51 | +        ( | 
|  | 52 | +            "httpd", | 
|  | 53 | +            { | 
|  | 54 | +                "regex": r"((?:\d+(?:(?:\.|:)(?:\d+|[a-fA-F]+)?){3,8}))(?:\s(?:-|\w+))*\s\[(\d{1,2}\/\w+\/\d{4}(?:[:]\d{2}){3}(?:\.\d{1,6})?(?:\s[+-]\d{2}[:]?\d{2})?(?:Z)?)]\s(.*)$", | 
|  | 55 | +                "match_element": 3, | 
|  | 56 | +            }, | 
|  | 57 | +        ), | 
|  | 58 | +    ] | 
|  | 59 | +) | 
| 18 | 60 | 
 | 
| 19 | 61 | 
 | 
| 20 | 62 | def escape_char_event(event): | 
| @@ -71,35 +113,13 @@ def escape_char_event(event): | 
| 71 | 113 | 
 | 
| 72 | 114 | 
 | 
| 73 | 115 | def strip_syslog_header(raw_event): | 
|  | 116 | +    """ | 
|  | 117 | +    removes syslog header and returns event without it, make sure header type is added to supported_headers | 
|  | 118 | +    Input: raw event | 
|  | 119 | +    """ | 
| 74 | 120 |     # remove leading space chars | 
| 75 | 121 |     raw_event = raw_event.strip() | 
| 76 |  | -    CEF_format_match = re.search( | 
| 77 |  | -        r"\s(CEF:\d\|[^\|]+\|([^\|]+)\|[^\|]+\|[^\|]+\|[^\|]+\|([^\|]+)\|(.*))", | 
| 78 |  | -        raw_event, | 
| 79 |  | -    ) | 
| 80 |  | -    if CEF_format_match: | 
| 81 |  | -        stripped_header = CEF_format_match.group(1) | 
| 82 |  | -        return stripped_header | 
| 83 |  | -    CEF_checkpoint_match = re.search( | 
| 84 |  | -        r"(time=\d+\|[^\|]+\|([^\|]+)\|[^\|]+\|[^\|]+\|[^\|]+\|([^\|]+)\|(.*))", | 
| 85 |  | -        raw_event, | 
| 86 |  | -    ) | 
| 87 |  | -    if CEF_checkpoint_match: | 
| 88 |  | -        stripped_header = CEF_checkpoint_match.group(1) | 
| 89 |  | -        return stripped_header | 
| 90 |  | -    regex_rfc5424 = re.search( | 
| 91 |  | -        r"(?:(\d{4}[-]\d{2}[-]\d{2}[T]\d{2}[:]\d{2}[:]\d{2}(?:\.\d{1,6})?(?:[+-]\d{2}[:]\d{2}|Z)?)|-)\s(?:([\w][\w\d\.@-]*)|-)\s(.*)$", | 
| 92 |  | -        raw_event, | 
| 93 |  | -    ) | 
| 94 |  | -    if regex_rfc5424: | 
| 95 |  | -        stripped_header = regex_rfc5424.group(3) | 
| 96 |  | -        return stripped_header | 
| 97 |  | -    regex_rfc3164 = re.search( | 
| 98 |  | -        r"([A-Z][a-z][a-z]\s{1,2}\d{1,2}(?:\s\d{4})?\s\d{2}[:]\d{2}[:]\d{2})\s+([\w][\w\d\.@-]*)\s\w*:?(.*)$", | 
| 99 |  | -        raw_event, | 
| 100 |  | -    ) | 
| 101 |  | -    if regex_rfc3164: | 
| 102 |  | -        stripped_header = regex_rfc3164.group(3) | 
| 103 |  | -        return stripped_header | 
| 104 |  | -    if not (CEF_format_match and regex_rfc3164 and regex_rfc5424): | 
| 105 |  | -        return None | 
|  | 122 | +    for header_format in supported_headers.values(): | 
|  | 123 | +        header_match = re.search(header_format.get("regex"), raw_event) | 
|  | 124 | +        if header_match: | 
|  | 125 | +            return header_match.group(header_format.get("match_element")) | 
0 commit comments