diff --git a/lib/fluent/plugin/parser_syslog.rb b/lib/fluent/plugin/parser_syslog.rb index 9387027458..cd3da593a8 100644 --- a/lib/fluent/plugin/parser_syslog.rb +++ b/lib/fluent/plugin/parser_syslog.rb @@ -38,6 +38,10 @@ class SyslogParser < Parser config_param :message_format, :enum, list: [:rfc3164, :rfc5424, :auto], default: :rfc3164 desc 'Specify time format for event time for rfc5424 protocol' config_param :rfc5424_time_format, :string, default: "%Y-%m-%dT%H:%M:%S.%L%z" + desc 'The parser type used to parse syslog message' + config_param :parser_type, :enum, list: [:regexp, :string], default: :regexp + desc 'support colonless ident in string parser' + config_param :support_colonless_ident, :bool, default: true def initialize super @@ -50,10 +54,17 @@ def configure(conf) @time_parser_rfc3164 = @time_parser_rfc5424 = nil @time_parser_rfc5424_without_subseconds = nil @support_rfc5424_without_subseconds = false + @regexp_parser = @parser_type == :regexp @regexp = case @message_format when :rfc3164 - class << self - alias_method :parse, :parse_plain + if @regexp_parser + class << self + alias_method :parse, :parse_plain + end + else + class << self + alias_method :parse, :parse_rfc3164 + end end @with_priority ? REGEXP_WITH_PRI : REGEXP when :rfc5424 @@ -88,11 +99,16 @@ def parse_auto(text, &block) @regexp = @with_priority ? REGEXP_RFC5424_WITH_PRI : REGEXP_RFC5424 @time_parser = @time_parser_rfc5424 @support_rfc5424_without_subseconds = true + parse_plain(text, &block) else @regexp = @with_priority ? REGEXP_WITH_PRI : REGEXP @time_parser = @time_parser_rfc3164 + if @regexp_parser + parse_plain(text, &block) + else + parse_rfc3164(text, &block) + end end - parse_plain(text, &block) end def parse_plain(text, &block) @@ -137,6 +153,93 @@ def parse_plain(text, &block) yield time, record end + + SPLIT_CHAR = ' '.freeze + + def parse_rfc3164(text, &block) + pri = nil + cursor = 0 + if @with_priority + if text.start_with?('<'.freeze) + i = text.index('>'.freeze, 1) + if i < 2 + yield nil, nil + return + end + pri = text.slice(1, i - 1).to_i + cursor = i + 1 + else + yield nil, nil + return + end + end + + # header part + time_size = 15 # skip Mmm dd hh:mm:ss + time_end = text[cursor + time_size] + if time_end == SPLIT_CHAR + time_str = text.slice(cursor, time_size) + cursor += 16 # time + ' ' + elsif time_end == '.'.freeze + # support subsecond time + i = text.index(SPLIT_CHAR, time_size) + time_str = text.slice(cursor, i - cursor) + cursor = i + 1 + else + yield nil, nil + return + end + + i = text.index(SPLIT_CHAR, cursor) + if i.nil? + yield nil, nil + return + end + host_size = i - cursor + host = text.slice(cursor, host_size) + cursor += host_size + 1 + + record = {'host' => host} + record['pri'] = pri if pri + + i = text.index(SPLIT_CHAR, cursor) + + # message part + msg = if i.nil? # for 'only non-space content case' + text.slice(cursor, text.bytesize) + else + if text[i - 1] == ':'.freeze + if text[i - 2] == ']'.freeze + left_braket_pos = text.index('['.freeze, cursor) + record['ident'] = text.slice(cursor, left_braket_pos - cursor) + record['pid'] = text.slice(left_braket_pos + 1, i - left_braket_pos - 3) # remove '[' / ']:' + else + record['ident'] = text.slice(cursor, i - cursor - 1) + end + text.slice(i + 1, text.bytesize) + else + if @support_colonless_ident + if text[i - 1] == ']'.freeze + left_braket_pos = text.index('['.freeze, cursor) + record['ident'] = text.slice(cursor, left_braket_pos - cursor) + record['pid'] = text.slice(left_braket_pos + 1, i - left_braket_pos - 2) # remove '[' / ']' + else + record['ident'] = text.slice(cursor, i - cursor) + end + text.slice(i + 1, text.bytesize) + else + text.slice(cursor, text.bytesize) + end + end + end + msg.chomp! + record['message'] = msg + + time = @time_parser.parse(time_str) + record['time'] = time_str if @keep_time_key + + yield time, record + end end end end diff --git a/test/plugin/test_parser_syslog.rb b/test/plugin/test_parser_syslog.rb index 58834843d4..841e2a3cdd 100644 --- a/test/plugin/test_parser_syslog.rb +++ b/test/plugin/test_parser_syslog.rb @@ -14,8 +14,9 @@ def setup } end - def test_parse - @parser.configure({}) + data('regexp' => 'regexp', 'string' => 'string') + def test_parse(param) + @parser.configure({'parser_type' => param}) @parser.instance.parse('Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected, record) @@ -24,8 +25,9 @@ def test_parse assert_equal("%b %d %H:%M:%S", @parser.instance.patterns['time_format']) end - def test_parse_with_time_format - @parser.configure('time_format' => '%b %d %M:%S:%H') + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_time_format(param) + @parser.configure('time_format' => '%b %d %M:%S:%H', 'parser_type' => param) @parser.instance.parse('Feb 28 00:00:12 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected, record) @@ -33,8 +35,18 @@ def test_parse_with_time_format assert_equal('%b %d %M:%S:%H', @parser.instance.patterns['time_format']) end - def test_parse_with_priority - @parser.configure('with_priority' => true) + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_subsecond_time(param) + @parser.configure('time_format' => '%b %d %H:%M:%S.%N', 'parser_type' => param) + @parser.instance.parse('Feb 28 12:00:00.456 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| + assert_equal(event_time('Feb 28 12:00:00.456', format: '%b %d %H:%M:%S.%N'), time) + assert_equal(@expected, record) + } + end + + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_priority(param) + @parser.configure('with_priority' => true, 'parser_type' => param) @parser.instance.parse('<6>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected.merge('pri' => 6), record) @@ -43,8 +55,18 @@ def test_parse_with_priority assert_equal("%b %d %H:%M:%S", @parser.instance.patterns['time_format']) end - def test_parse_without_colon - @parser.configure({}) + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_empty_priority(param) + @parser.configure('with_priority' => true, 'parser_type' => param) + @parser.instance.parse('<>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| + assert_nil time + assert_nil record + } + end + + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_without_colon(param) + @parser.configure({'parser_type' => param}) @parser.instance.parse('Feb 28 12:00:00 192.168.0.1 fluentd[11111] [error] Syslog test') { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected, record) @@ -53,10 +75,12 @@ def test_parse_without_colon assert_equal("%b %d %H:%M:%S", @parser.instance.patterns['time_format']) end - def test_parse_with_keep_time_key + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_keep_time_key(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'keep_time_key'=>'true', + 'parser_type' => param ) text = 'Feb 28 00:00:12 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| @@ -64,24 +88,87 @@ def test_parse_with_keep_time_key end end - def test_parse_various_characters_for_tag + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_various_characters_for_tag(param) ident = '~!@#$%^&*()_+=-`]{};"\'/?\\,.<>' - @parser.configure({}) + @parser.configure({'parser_type' => param}) @parser.instance.parse("Feb 28 12:00:00 192.168.0.1 #{ident}[11111]: [error] Syslog test") { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected.merge('ident' => ident), record) } end - def test_parse_various_characters_for_tag_with_priority + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_various_characters_for_tag_with_priority(param) ident = '~!@#$%^&*()_+=-`]{};"\'/?\\,.<>' - @parser.configure('with_priority' => true) + @parser.configure('with_priority' => true, 'parser_type' => param) @parser.instance.parse("<6>Feb 28 12:00:00 192.168.0.1 #{ident}[11111]: [error] Syslog test") { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected.merge('pri' => 6, 'ident' => ident), record) } end + sub_test_case 'Check the difference of regexp and string parser' do + # examples from rfc3164 + data('regexp' => 'regexp', 'string' => 'string') + test 'wrong result with no ident message by default' do |param| + @parser.configure('parser_type' => param) + @parser.instance.parse('Feb 5 17:32:18 10.0.0.99 Use the BFG!') { |time, record| + assert_equal({'host' => '10.0.0.99', 'ident' => 'Use', 'message' => 'the BFG!'}, record) + } + end + + test "proper result with no ident message by 'support_colonless_ident false'" do + @parser.configure('parser_type' => 'string', 'support_colonless_ident' => false) + @parser.instance.parse('Feb 5 17:32:18 10.0.0.99 Use the BFG!') { |time, record| + assert_equal({'host' => '10.0.0.99', 'message' => 'Use the BFG!'}, record) + } + end + + test "string parsers can't parse broken syslog message and generate wrong record" do + @parser.configure('parser_type' => 'string') + @parser.instance.parse("1990 Oct 22 10:52:01 TZ-6 scapegoat.dmz.example.org 10.1.2.32 sched[0]: That's All Folks!") { |time, record| + expected = {'host' => 'scapegoat.dmz.example.org', 'ident' => 'sched', 'pid' => '0', 'message' => "That's All Folks!"} + assert_not_equal(expected, record) + } + end + + test "regexp parsers can't parse broken syslog message and raises an error" do + @parser.configure('parser_type' => 'regexp') + assert_raise(Fluent::TimeParser::TimeParseError) { + @parser.instance.parse("1990 Oct 22 10:52:01 TZ-6 scapegoat.dmz.example.org 10.1.2.32 sched[0]: That's All Folks!") { |time, record| } + } + end + + data('regexp' => 'regexp', 'string' => 'string') + test "':' included message breaks regexp parser" do |param| + @parser.configure('parser_type' => param) + @parser.instance.parse('Aug 10 12:00:00 127.0.0.1 test foo:bar') { |time, record| + expected = {'host' => '127.0.0.1', 'ident' => 'test', 'message' => 'foo:bar'} + if param == 'string' + assert_equal(expected, record) + else + assert_not_equal(expected, record) + end + } + end + + data('regexp' => 'regexp', 'string' => 'string') + test "Only no whitespace content in MSG causes different result" do |param| + @parser.configure('parser_type' => param) + @parser.instance.parse('Aug 10 12:00:00 127.0.0.1 value1,value2,value3,value4') { |time, record| + # 'message' is correct but regexp set it as 'ident' + if param == 'string' + expected = {'host' => '127.0.0.1', 'message' => 'value1,value2,value3,value4'} + assert_equal(expected, record) + else + expected = {'host' => '127.0.0.1', 'ident' => 'value1,value2,value3,value4', 'message' => ''} + assert_equal(expected, record) + end + } + end + end + class TestRFC5424Regexp < self def test_parse_with_rfc5424_message @parser.configure( @@ -273,10 +360,12 @@ def test_parse_with_rfc5424_message_both_timestamp end class TestAutoRegexp < self - def test_auto_with_legacy_syslog_message + data('regexp' => 'regexp', 'string' => 'string') + def test_auto_with_legacy_syslog_message(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'message_format' => 'auto', + 'parser_type' => param ) text = 'Feb 28 00:00:12 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| @@ -286,11 +375,13 @@ def test_auto_with_legacy_syslog_message assert_equal(Fluent::Plugin::SyslogParser::REGEXP, @parser.instance.patterns['format']) end - def test_auto_with_legacy_syslog_priority_message + data('regexp' => 'regexp', 'string' => 'string') + def test_auto_with_legacy_syslog_priority_message(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'with_priority' => true, 'message_format' => 'auto', + 'parser_type' => param ) text = '<6>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| @@ -300,11 +391,13 @@ def test_auto_with_legacy_syslog_priority_message assert_equal(Fluent::Plugin::SyslogParser::REGEXP_WITH_PRI, @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'auto', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -318,11 +411,13 @@ def test_parse_with_rfc5424_message @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_structured_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_structured_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'auto', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -337,12 +432,14 @@ def test_parse_with_rfc5424_structured_message @parser.instance.patterns['format']) end - def test_parse_with_both_message_type + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_both_message_type(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'rfc5424_time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'auto', 'with_priority' => true, + 'parser_type' => param ) text = '<1>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| @@ -382,12 +479,14 @@ def test_parse_with_both_message_type @parser.instance.patterns['format']) end - def test_parse_with_both_message_type_and_priority + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_both_message_type_and_priority(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'rfc5424_time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'with_priority' => true, 'message_format' => 'auto', + 'parser_type' => param ) text = '<6>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record|