diff --git a/lib/fluent/plugin/parser_syslog.rb b/lib/fluent/plugin/parser_syslog.rb index 2d63b5bfb4..9478ffbfd0 100644 --- a/lib/fluent/plugin/parser_syslog.rb +++ b/lib/fluent/plugin/parser_syslog.rb @@ -66,6 +66,7 @@ def initialize @space_count = nil @space_count_rfc5424 = nil @skip_space_count = false + @skip_space_count_rfc5424 = false end def configure(conf) @@ -88,11 +89,18 @@ class << self end RFC3164_WITHOUT_TIME_AND_PRI_REGEXP when :rfc5424 - class << self - alias_method :parse, :parse_rfc5424_regex + if @regexp_parser + class << self + alias_method :parse, :parse_rfc5424_regex + end + else + class << self + alias_method :parse, :parse_rfc5424 + end end @time_format = @rfc5424_time_format unless conf.has_key?('time_format') @support_rfc5424_without_subseconds = true + @skip_space_count_rfc5424 = @time_format.count(' ').zero? RFC5424_WITHOUT_TIME_AND_PRI_REGEXP when :auto class << self @@ -100,6 +108,7 @@ class << self end @time_parser_rfc3164 = time_parser_create(format: @time_format) @time_parser_rfc5424 = time_parser_create(format: @rfc5424_time_format) + @skip_space_count_rfc5424 = @rfc5424_time_format.count(' ').zero? nil end @@ -127,7 +136,11 @@ def parse_auto(text, &block) @regexp = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP @time_parser = @time_parser_rfc5424 @support_rfc5424_without_subseconds = true - parse_rfc5424_regex(text, &block) + if @regexp_parser + parse_rfc5424_regex(text, &block) + else + parse_rfc5424(text, &block) + end else @regexp = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP @time_parser = @time_parser_rfc3164 @@ -350,6 +363,133 @@ def parse_rfc3164(text, &block) yield time, record end + + NILVALUE = '-'.freeze + + def parse_rfc5424(text, &block) + pri = nil + cursor = 0 + if @with_priority + if text.start_with?('<'.freeze) + i = text.index('>'.freeze, 1) + if i < 2 + yield nil, nil + return + end + pri = text.slice(1, i - 1).to_i + i = text.index(SPLIT_CHAR, i) + cursor = i + 1 + else + yield nil, nil + return + end + end + + # timestamp part + if @skip_space_count_rfc5424 + i = text.index(SPLIT_CHAR, cursor) + time_str = text.slice(cursor, i - cursor) + cursor = i + 1 + else + i = cursor - 1 + sq = false + @space_count.times do + while text[i + 1] == SPLIT_CHAR + sq = true + i += 1 + end + i = text.index(SPLIT_CHAR, i + 1) + end + + time_str = sq ? text.slice(idx, i - cursor).squeeze(SPLIT_CHAR) : text.slice(cursor, i - cursor) + cursor = i + 1 + end + + # Repeat same code for the performance + + # host part + i = text.index(SPLIT_CHAR, cursor) + unless i + yield nil, nil + return + end + slice_size = i - cursor + host = text.slice(cursor, slice_size) + cursor += slice_size + 1 + + # ident part + i = text.index(SPLIT_CHAR, cursor) + unless i + yield nil, nil + return + end + slice_size = i - cursor + ident = text.slice(cursor, slice_size) + cursor += slice_size + 1 + + # pid part + i = text.index(SPLIT_CHAR, cursor) + unless i + yield nil, nil + return + end + slice_size = i - cursor + pid = text.slice(cursor, slice_size) + cursor += slice_size + 1 + + # msgid part + i = text.index(SPLIT_CHAR, cursor) + unless i + yield nil, nil + return + end + slice_size = i - cursor + msgid = text.slice(cursor, slice_size) + cursor += slice_size + 1 + + record = {'host' => host, 'ident' => ident, 'pid' => pid, 'msgid' => msgid} + record['pri'] = pri if pri + + # extradata part + ed_start = text[cursor] + if ed_start == NILVALUE + record['extradata'] = NILVALUE + cursor += 1 + else + start = cursor + i = text.index('] '.freeze, cursor) + extradata = if i + diff = i + 1 - start # calculate ']' position + cursor += diff + text.slice(start, diff) + else # No message part case + cursor = text.bytesize + text.slice(start, cursor) + end + extradata.tr!("\\".freeze, ''.freeze) + record['extradata'] = extradata + end + + # message part + if cursor != text.bytesize + msg = text[cursor + 1..-1] + msg.chomp! + record['message'] = msg + end + + time = begin + @time_parser.parse(time_str) + rescue Fluent::TimeParser::TimeParseError => e + if @support_rfc5424_without_subseconds + @time_parser_rfc5424_without_subseconds.parse(time_str) + else + raise + end + end + record['time'] = time_str if @keep_time_key + + yield time, record + end end end end diff --git a/test/plugin/test_parser_syslog.rb b/test/plugin/test_parser_syslog.rb index 2c503ad3a3..45cab4e597 100644 --- a/test/plugin/test_parser_syslog.rb +++ b/test/plugin/test_parser_syslog.rb @@ -199,11 +199,13 @@ def test_parse_various_characters_for_tag_with_priority(param) end class TestRFC5424Regexp < self - def test_parse_with_rfc5424_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -216,11 +218,13 @@ def test_parse_with_rfc5424_message assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_message_trailing_eol + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message_trailing_eol(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = "<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!\n" @parser.instance.parse(text) do |time, record| @@ -233,11 +237,13 @@ def test_parse_with_rfc5424_message_trailing_eol assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_multiline_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_multiline_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = "<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi,\nfrom\nFluentd!" @parser.instance.parse(text) do |time, record| @@ -250,10 +256,12 @@ def test_parse_with_rfc5424_multiline_message assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_message_and_without_priority + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message_and_without_priority(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', + 'parser_type' => param ) text = '2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -266,10 +274,12 @@ def test_parse_with_rfc5424_message_and_without_priority assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_empty_message_and_without_priority + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_empty_message_and_without_priority(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', + 'parser_type' => param ) text = '2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - -' @parser.instance.parse(text) do |time, record| @@ -282,10 +292,12 @@ def test_parse_with_rfc5424_empty_message_and_without_priority assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_message_without_time_format + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message_without_time_format(param) @parser.configure( 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -297,10 +309,12 @@ def test_parse_with_rfc5424_message_without_time_format end end - def test_parse_with_rfc5424_message_with_priority_and_pid + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message_with_priority_and_pid(param) @parser.configure( 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<28>1 2018-09-26T15:54:26.620412+09:00 machine minissdpd 1298 - - peer 192.168.0.5:50123 is not from a LAN' @parser.instance.parse(text) do |time, record| @@ -312,11 +326,13 @@ def test_parse_with_rfc5424_message_with_priority_and_pid end end - def test_parse_with_rfc5424_structured_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_structured_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] [Hi] from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -329,11 +345,13 @@ def test_parse_with_rfc5424_structured_message end end - def test_parse_with_rfc5424_multiple_structured_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_multiple_structured_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"][exampleSDID@20224 class="high"] Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -346,11 +364,13 @@ def test_parse_with_rfc5424_multiple_structured_message end end - def test_parse_with_rfc5424_message_includes_right_bracket + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message_includes_right_bracket(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] [Hi] from Fluentd]!' @parser.instance.parse(text) do |time, record| @@ -363,11 +383,13 @@ def test_parse_with_rfc5424_message_includes_right_bracket end end - def test_parse_with_rfc5424_empty_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_empty_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"]' @parser.instance.parse(text) do |time, record| @@ -380,10 +402,35 @@ def test_parse_with_rfc5424_empty_message end end - def test_parse_with_rfc5424_message_without_subseconds + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_space_empty_message(param) + @parser.configure( + 'message_format' => 'rfc5424', + 'with_priority' => true, + 'parser_type' => param + ) + text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] ' + @parser.instance.parse(text) do |time, record| + if param == 'string' + assert_equal(event_time("2017-02-06T13:14:15.003Z", format: '%Y-%m-%dT%H:%M:%S.%L%z'), time) + assert_equal "11111", record["pid"] + assert_equal "ID24224", record["msgid"] + assert_equal "[exampleSDID@20224 iut=\"3\" eventSource=\"Application\" eventID=\"11211\"]", + record["extradata"] + assert_equal '', record["message"] + else + assert_nil time + assert_nil record + end + end + end + + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message_without_subseconds(param) @parser.configure( 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15Z 192.168.0.1 fluentd - - - Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -395,10 +442,12 @@ def test_parse_with_rfc5424_message_without_subseconds end end - def test_parse_with_rfc5424_message_both_timestamp + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message_both_timestamp(param) @parser.configure( 'message_format' => 'rfc5424', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15Z 192.168.0.1 fluentd - - - Hi, from Fluentd!' @parser.instance.parse(text) do |time, record|