From 68f3ec723c28d24575cb12640ed8cae44debd291 Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Tue, 3 Sep 2019 14:42:03 +0900 Subject: [PATCH 1/8] parser_syslog: Implement new parser for syslog rfc3164 Signed-off-by: Masahiro Nakagawa --- lib/fluent/plugin/parser_syslog.rb | 107 ++++++++++++++++++++++++++++- test/plugin/test_parser_syslog.rb | 58 +++++++++++----- 2 files changed, 143 insertions(+), 22 deletions(-) diff --git a/lib/fluent/plugin/parser_syslog.rb b/lib/fluent/plugin/parser_syslog.rb index 9387027458..3b1045e514 100644 --- a/lib/fluent/plugin/parser_syslog.rb +++ b/lib/fluent/plugin/parser_syslog.rb @@ -38,6 +38,10 @@ class SyslogParser < Parser config_param :message_format, :enum, list: [:rfc3164, :rfc5424, :auto], default: :rfc3164 desc 'Specify time format for event time for rfc5424 protocol' config_param :rfc5424_time_format, :string, default: "%Y-%m-%dT%H:%M:%S.%L%z" + desc 'The parser type used to parse syslog message' + config_param :parser_type, :enum, list: [:regexp, :string], default: :regexp + desc 'support colonless ident in string parser' + config_param :support_colonless_ident, :bool, default: true def initialize super @@ -50,10 +54,17 @@ def configure(conf) @time_parser_rfc3164 = @time_parser_rfc5424 = nil @time_parser_rfc5424_without_subseconds = nil @support_rfc5424_without_subseconds = false + @regexp_parser = @parser_type == :regexp @regexp = case @message_format when :rfc3164 - class << self - alias_method :parse, :parse_plain + if @regexp_parser + class << self + alias_method :parse, :parse_plain + end + else + class << self + alias_method :parse, :parse_rfc3164 + end end @with_priority ? REGEXP_WITH_PRI : REGEXP when :rfc5424 @@ -88,11 +99,16 @@ def parse_auto(text, &block) @regexp = @with_priority ? REGEXP_RFC5424_WITH_PRI : REGEXP_RFC5424 @time_parser = @time_parser_rfc5424 @support_rfc5424_without_subseconds = true + parse_plain(text, &block) else @regexp = @with_priority ? REGEXP_WITH_PRI : REGEXP @time_parser = @time_parser_rfc3164 + if @regexp_parser + parse_plain(text, &block) + else + parse_rfc3164(text, &block) + end end - parse_plain(text, &block) end def parse_plain(text, &block) @@ -137,6 +153,91 @@ def parse_plain(text, &block) yield time, record end + + SPLIT_CHAR = ' '.freeze + PRI_START_CHAR = '<'.freeze + + def parse_rfc3164(text, &block) + pri = nil + start = 0 + if @with_priority + if text.start_with?(PRI_START_CHAR) + i = text.index('>'.freeze, 1) + pri = text.slice(1, i - 1).to_i + start = i + 1 + else + yield nil, nil + return + end + end + + # header part + diff = 15 # skip Mmm dd hh:mm:ss + time_end = text[start + diff] + if time_end == SPLIT_CHAR + time_str = text.slice(start, diff) + start += 16 # time + ' ' + elsif time_end == '.'.freeze + # support subsecond time + i = text.index(SPLIT_CHAR, diff) + time_str = text.slice(start, i - start) + start = i + 1 + else + yield nil, nil + return + end + + i = text.index(SPLIT_CHAR, start) + if i.nil? + yield nil, nil + return + end + diff = i - start + host = text.slice(start, diff) + start += (diff + 1) + + i = text.index(SPLIT_CHAR, start) + if i.nil? + yield nil, nil + return + end + diff = i - start + + record = {'host' => host} + record['pri'] = pri if pri + + # message part + msg = if text[i - 1] == ':'.freeze + if text[i - 2] == ']'.freeze + j = text.index('['.freeze, start) + record['ident'] = text.slice(start, j - start) + record['pid'] = text.slice(j + 1, i - j - 3) # remove '[' / ']:' + else + record['ident'] = text.slice(start, i - start - 1) + end + text.slice(i + 1, text.bytesize) + else + if @support_colonless_ident + if text[i - 1] == ']'.freeze + j = text.index('['.freeze, start) + record['ident'] = text.slice(start, j - start) + record['pid'] = text.slice(j + 1, i - j - 2) # remove '[' / ']' + else + record['ident'] = text.slice(start, i - start) + end + text.slice(i + 1, text.bytesize) + else + text.slice(i - diff, text.bytesize) + end + end + msg.chomp! + record['message'] = msg + + time = @time_parser.parse(time_str.squeeze(SPLIT_CHAR)) + record['time'] = time_str if @keep_time_key + + yield time, record + end end end end diff --git a/test/plugin/test_parser_syslog.rb b/test/plugin/test_parser_syslog.rb index 58834843d4..252077a8b7 100644 --- a/test/plugin/test_parser_syslog.rb +++ b/test/plugin/test_parser_syslog.rb @@ -14,8 +14,9 @@ def setup } end - def test_parse - @parser.configure({}) + data('regexp' => 'regexp', 'string' => 'string') + def test_parse(param) + @parser.configure({'parser_type' => param}) @parser.instance.parse('Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected, record) @@ -24,8 +25,9 @@ def test_parse assert_equal("%b %d %H:%M:%S", @parser.instance.patterns['time_format']) end - def test_parse_with_time_format - @parser.configure('time_format' => '%b %d %M:%S:%H') + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_time_format(param) + @parser.configure('time_format' => '%b %d %M:%S:%H', 'parser_type' => param) @parser.instance.parse('Feb 28 00:00:12 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected, record) @@ -33,8 +35,9 @@ def test_parse_with_time_format assert_equal('%b %d %M:%S:%H', @parser.instance.patterns['time_format']) end - def test_parse_with_priority - @parser.configure('with_priority' => true) + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_priority(param) + @parser.configure('with_priority' => true, 'parser_type' => param) @parser.instance.parse('<6>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected.merge('pri' => 6), record) @@ -43,8 +46,9 @@ def test_parse_with_priority assert_equal("%b %d %H:%M:%S", @parser.instance.patterns['time_format']) end - def test_parse_without_colon - @parser.configure({}) + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_without_colon(param) + @parser.configure({'parser_type' => param}) @parser.instance.parse('Feb 28 12:00:00 192.168.0.1 fluentd[11111] [error] Syslog test') { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected, record) @@ -53,10 +57,12 @@ def test_parse_without_colon assert_equal("%b %d %H:%M:%S", @parser.instance.patterns['time_format']) end - def test_parse_with_keep_time_key + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_keep_time_key(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'keep_time_key'=>'true', + 'parser_type' => param ) text = 'Feb 28 00:00:12 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| @@ -64,18 +70,20 @@ def test_parse_with_keep_time_key end end - def test_parse_various_characters_for_tag + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_various_characters_for_tag(param) ident = '~!@#$%^&*()_+=-`]{};"\'/?\\,.<>' - @parser.configure({}) + @parser.configure({'parser_type' => param}) @parser.instance.parse("Feb 28 12:00:00 192.168.0.1 #{ident}[11111]: [error] Syslog test") { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected.merge('ident' => ident), record) } end - def test_parse_various_characters_for_tag_with_priority + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_various_characters_for_tag_with_priority(param) ident = '~!@#$%^&*()_+=-`]{};"\'/?\\,.<>' - @parser.configure('with_priority' => true) + @parser.configure('with_priority' => true, 'parser_type' => param) @parser.instance.parse("<6>Feb 28 12:00:00 192.168.0.1 #{ident}[11111]: [error] Syslog test") { |time, record| assert_equal(event_time('Feb 28 12:00:00', format: '%b %d %H:%M:%S'), time) assert_equal(@expected.merge('pri' => 6, 'ident' => ident), record) @@ -273,10 +281,12 @@ def test_parse_with_rfc5424_message_both_timestamp end class TestAutoRegexp < self - def test_auto_with_legacy_syslog_message + data('regexp' => 'regexp', 'string' => 'string') + def test_auto_with_legacy_syslog_message(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'message_format' => 'auto', + 'parser_type' => param ) text = 'Feb 28 00:00:12 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| @@ -286,11 +296,13 @@ def test_auto_with_legacy_syslog_message assert_equal(Fluent::Plugin::SyslogParser::REGEXP, @parser.instance.patterns['format']) end - def test_auto_with_legacy_syslog_priority_message + data('regexp' => 'regexp', 'string' => 'string') + def test_auto_with_legacy_syslog_priority_message(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'with_priority' => true, 'message_format' => 'auto', + 'parser_type' => param ) text = '<6>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| @@ -300,11 +312,13 @@ def test_auto_with_legacy_syslog_priority_message assert_equal(Fluent::Plugin::SyslogParser::REGEXP_WITH_PRI, @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'auto', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -318,11 +332,13 @@ def test_parse_with_rfc5424_message @parser.instance.patterns['format']) end - def test_parse_with_rfc5424_structured_message + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_rfc5424_structured_message(param) @parser.configure( 'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'auto', 'with_priority' => true, + 'parser_type' => param ) text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] Hi, from Fluentd!' @parser.instance.parse(text) do |time, record| @@ -337,12 +353,14 @@ def test_parse_with_rfc5424_structured_message @parser.instance.patterns['format']) end - def test_parse_with_both_message_type + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_both_message_type(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'rfc5424_time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'message_format' => 'auto', 'with_priority' => true, + 'parser_type' => param ) text = '<1>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| @@ -382,12 +400,14 @@ def test_parse_with_both_message_type @parser.instance.patterns['format']) end - def test_parse_with_both_message_type_and_priority + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_both_message_type_and_priority(param) @parser.configure( 'time_format' => '%b %d %M:%S:%H', 'rfc5424_time_format' => '%Y-%m-%dT%H:%M:%S.%L%z', 'with_priority' => true, 'message_format' => 'auto', + 'parser_type' => param ) text = '<6>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test' @parser.instance.parse(text) do |time, record| From f8af92490e9797167e4e8e35e828229cf50afbe5 Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Tue, 3 Sep 2019 20:17:17 +0900 Subject: [PATCH 2/8] parser_json: Add comparison test for regexp and string parser Signed-off-by: Masahiro Nakagawa --- test/plugin/test_parser_syslog.rb | 55 +++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/test/plugin/test_parser_syslog.rb b/test/plugin/test_parser_syslog.rb index 252077a8b7..e39522cda9 100644 --- a/test/plugin/test_parser_syslog.rb +++ b/test/plugin/test_parser_syslog.rb @@ -35,6 +35,15 @@ def test_parse_with_time_format(param) assert_equal('%b %d %M:%S:%H', @parser.instance.patterns['time_format']) end + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_subsecond_time(param) + @parser.configure('time_format' => '%b %d %H:%M:%S.%N', 'parser_type' => param) + @parser.instance.parse('Feb 28 12:00:00.456 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| + assert_equal(event_time('Feb 28 12:00:00.456', format: '%b %d %H:%M:%S.%N'), time) + assert_equal(@expected, record) + } + end + data('regexp' => 'regexp', 'string' => 'string') def test_parse_with_priority(param) @parser.configure('with_priority' => true, 'parser_type' => param) @@ -90,6 +99,52 @@ def test_parse_various_characters_for_tag_with_priority(param) } end + sub_test_case 'Check the difference of regexp and string parser' do + # examples from rfc3164 + data('regexp' => 'regexp', 'string' => 'string') + test 'wrong result with no ident message by default' do |param| + @parser.configure('parser_type' => param) + @parser.instance.parse('Feb 5 17:32:18 10.0.0.99 Use the BFG!') { |time, record| + assert_equal({'host' => '10.0.0.99', 'ident' => 'Use', 'message' => 'the BFG!'}, record) + } + end + + test "proper result with no ident message by 'support_colonless_ident false'" do + @parser.configure('parser_type' => 'string', 'support_colonless_ident' => false) + @parser.instance.parse('Feb 5 17:32:18 10.0.0.99 Use the BFG!') { |time, record| + assert_equal({'host' => '10.0.0.99', 'message' => 'Use the BFG!'}, record) + } + end + + data('regexp' => 'regexp', 'string' => 'string') + test "both parsers can't parse broken syslog message" do |param| + @parser.configure('parser_type' => param) + if param == 'string' + @parser.instance.parse("1990 Oct 22 10:52:01 TZ-6 scapegoat.dmz.example.org 10.1.2.32 sched[0]: That's All Folks!") { |time, record| + expected = {'host' => 'scapegoat.dmz.example.org', 'ident' => 'sched', 'pid' => '0', 'message' => "That's All Folks!"} + assert_not_equal(expected, record) + } + else + assert_raise(Fluent::TimeParser::TimeParseError) { + @parser.instance.parse("1990 Oct 22 10:52:01 TZ-6 scapegoat.dmz.example.org 10.1.2.32 sched[0]: That's All Folks!") { |time, record| } + } + end + end + + data('regexp' => 'regexp', 'string' => 'string') + test "':' included message breaks regexp parser" do |param| + @parser.configure('parser_type' => param) + @parser.instance.parse('Aug 10 12:00:00 127.0.0.1 test foo:bar') { |time, record| + expected = {'host' => '127.0.0.1', 'ident' => 'test', 'message' => 'foo:bar'} + if param == 'string' + assert_equal(expected, record) + else + assert_not_equal(expected, record) + end + } + end + end + class TestRFC5424Regexp < self def test_parse_with_rfc5424_message @parser.configure( From 302852e400316a1ba4de3d2adb650db6a2dd9b34 Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Wed, 4 Sep 2019 17:09:14 +0900 Subject: [PATCH 3/8] parser_syslog: refactor code Signed-off-by: Masahiro Nakagawa --- lib/fluent/plugin/parser_syslog.rb | 50 +++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/lib/fluent/plugin/parser_syslog.rb b/lib/fluent/plugin/parser_syslog.rb index 3b1045e514..45e613e2ad 100644 --- a/lib/fluent/plugin/parser_syslog.rb +++ b/lib/fluent/plugin/parser_syslog.rb @@ -159,12 +159,12 @@ def parse_plain(text, &block) def parse_rfc3164(text, &block) pri = nil - start = 0 + cursor = 0 if @with_priority if text.start_with?(PRI_START_CHAR) i = text.index('>'.freeze, 1) pri = text.slice(1, i - 1).to_i - start = i + 1 + cursor = i + 1 else yield nil, nil return @@ -172,62 +172,62 @@ def parse_rfc3164(text, &block) end # header part - diff = 15 # skip Mmm dd hh:mm:ss - time_end = text[start + diff] + time_diff = 15 # skip Mmm dd hh:mm:ss + time_end = text[cursor + time_diff] if time_end == SPLIT_CHAR - time_str = text.slice(start, diff) - start += 16 # time + ' ' + time_str = text.slice(cursor, time_diff) + cursor += 16 # time + ' ' elsif time_end == '.'.freeze # support subsecond time - i = text.index(SPLIT_CHAR, diff) - time_str = text.slice(start, i - start) - start = i + 1 + i = text.index(SPLIT_CHAR, time_diff) + time_str = text.slice(cursor, i - cursor) + cursor = i + 1 else yield nil, nil return end - i = text.index(SPLIT_CHAR, start) + i = text.index(SPLIT_CHAR, cursor) if i.nil? yield nil, nil return end - diff = i - start - host = text.slice(start, diff) - start += (diff + 1) + host_diff = i - cursor + host = text.slice(cursor, host_diff) + cursor += (host_diff + 1) - i = text.index(SPLIT_CHAR, start) + record = {'host' => host} + record['pri'] = pri if pri + + i = text.index(SPLIT_CHAR, cursor) if i.nil? yield nil, nil return end - diff = i - start - - record = {'host' => host} - record['pri'] = pri if pri + diff = i - cursor # message part msg = if text[i - 1] == ':'.freeze if text[i - 2] == ']'.freeze - j = text.index('['.freeze, start) - record['ident'] = text.slice(start, j - start) + j = text.index('['.freeze, cursor) + record['ident'] = text.slice(cursor, j - cursor) record['pid'] = text.slice(j + 1, i - j - 3) # remove '[' / ']:' else - record['ident'] = text.slice(start, i - start - 1) + record['ident'] = text.slice(cursor, i - cursor - 1) end text.slice(i + 1, text.bytesize) else if @support_colonless_ident if text[i - 1] == ']'.freeze - j = text.index('['.freeze, start) - record['ident'] = text.slice(start, j - start) + j = text.index('['.freeze, cursor) + record['ident'] = text.slice(cursor, j - cursor) record['pid'] = text.slice(j + 1, i - j - 2) # remove '[' / ']' else - record['ident'] = text.slice(start, i - start) + record['ident'] = text.slice(cursor, i - cursor) end text.slice(i + 1, text.bytesize) else - text.slice(i - diff, text.bytesize) + text.slice(cursor, text.bytesize) end end msg.chomp! From 94887d89196829f04d269ac766d2e1c1d87c50fd Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Wed, 4 Sep 2019 17:27:11 +0900 Subject: [PATCH 4/8] parser_syslog: Fix only no whitespace content handling Signed-off-by: Masahiro Nakagawa --- lib/fluent/plugin/parser_syslog.rb | 37 +++++++++++++++--------------- test/plugin/test_parser_syslog.rb | 15 ++++++++++++ 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/lib/fluent/plugin/parser_syslog.rb b/lib/fluent/plugin/parser_syslog.rb index 45e613e2ad..27b96da996 100644 --- a/lib/fluent/plugin/parser_syslog.rb +++ b/lib/fluent/plugin/parser_syslog.rb @@ -200,34 +200,33 @@ def parse_rfc3164(text, &block) record['pri'] = pri if pri i = text.index(SPLIT_CHAR, cursor) - if i.nil? - yield nil, nil - return - end - diff = i - cursor # message part - msg = if text[i - 1] == ':'.freeze - if text[i - 2] == ']'.freeze - j = text.index('['.freeze, cursor) - record['ident'] = text.slice(cursor, j - cursor) - record['pid'] = text.slice(j + 1, i - j - 3) # remove '[' / ']:' - else - record['ident'] = text.slice(cursor, i - cursor - 1) - end - text.slice(i + 1, text.bytesize) + msg = if i.nil? # for 'only non-space content case' + text.slice(cursor, text.bytesize) else - if @support_colonless_ident - if text[i - 1] == ']'.freeze + if text[i - 1] == ':'.freeze + if text[i - 2] == ']'.freeze j = text.index('['.freeze, cursor) record['ident'] = text.slice(cursor, j - cursor) - record['pid'] = text.slice(j + 1, i - j - 2) # remove '[' / ']' + record['pid'] = text.slice(j + 1, i - j - 3) # remove '[' / ']:' else - record['ident'] = text.slice(cursor, i - cursor) + record['ident'] = text.slice(cursor, i - cursor - 1) end text.slice(i + 1, text.bytesize) else - text.slice(cursor, text.bytesize) + if @support_colonless_ident + if text[i - 1] == ']'.freeze + j = text.index('['.freeze, cursor) + record['ident'] = text.slice(cursor, j - cursor) + record['pid'] = text.slice(j + 1, i - j - 2) # remove '[' / ']' + else + record['ident'] = text.slice(cursor, i - cursor) + end + text.slice(i + 1, text.bytesize) + else + text.slice(cursor, text.bytesize) + end end end msg.chomp! diff --git a/test/plugin/test_parser_syslog.rb b/test/plugin/test_parser_syslog.rb index e39522cda9..c7eeea156c 100644 --- a/test/plugin/test_parser_syslog.rb +++ b/test/plugin/test_parser_syslog.rb @@ -143,6 +143,21 @@ def test_parse_various_characters_for_tag_with_priority(param) end } end + + data('regexp' => 'regexp', 'string' => 'string') + test "Only no whitespace content in MSG causes different result" do |param| + @parser.configure('parser_type' => param) + @parser.instance.parse('Aug 10 12:00:00 127.0.0.1 value1,value2,value3,value4') { |time, record| + # 'message' is correct but regexp set it as 'ident' + if param == 'string' + expected = {'host' => '127.0.0.1', 'message' => 'value1,value2,value3,value4'} + assert_equal(expected, record) + else + expected = {'host' => '127.0.0.1', 'ident' => 'value1,value2,value3,value4', 'message' => ''} + assert_equal(expected, record) + end + } + end end class TestRFC5424Regexp < self From 41542722d8971f6417255b62cf1d479e90e2bf71 Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Wed, 4 Sep 2019 17:29:53 +0900 Subject: [PATCH 5/8] parser_syslog: Separate broken syslog tests Signed-off-by: Masahiro Nakagawa --- test/plugin/test_parser_syslog.rb | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/test/plugin/test_parser_syslog.rb b/test/plugin/test_parser_syslog.rb index c7eeea156c..e0f5b3fdd4 100644 --- a/test/plugin/test_parser_syslog.rb +++ b/test/plugin/test_parser_syslog.rb @@ -116,19 +116,19 @@ def test_parse_various_characters_for_tag_with_priority(param) } end - data('regexp' => 'regexp', 'string' => 'string') - test "both parsers can't parse broken syslog message" do |param| - @parser.configure('parser_type' => param) - if param == 'string' - @parser.instance.parse("1990 Oct 22 10:52:01 TZ-6 scapegoat.dmz.example.org 10.1.2.32 sched[0]: That's All Folks!") { |time, record| - expected = {'host' => 'scapegoat.dmz.example.org', 'ident' => 'sched', 'pid' => '0', 'message' => "That's All Folks!"} - assert_not_equal(expected, record) - } - else - assert_raise(Fluent::TimeParser::TimeParseError) { - @parser.instance.parse("1990 Oct 22 10:52:01 TZ-6 scapegoat.dmz.example.org 10.1.2.32 sched[0]: That's All Folks!") { |time, record| } - } - end + test "string parsers can't parse broken syslog message and generate wrong record" do + @parser.configure('parser_type' => 'string') + @parser.instance.parse("1990 Oct 22 10:52:01 TZ-6 scapegoat.dmz.example.org 10.1.2.32 sched[0]: That's All Folks!") { |time, record| + expected = {'host' => 'scapegoat.dmz.example.org', 'ident' => 'sched', 'pid' => '0', 'message' => "That's All Folks!"} + assert_not_equal(expected, record) + } + end + + test "regexp parsers can't parse broken syslog message and raises an error" do + @parser.configure('parser_type' => 'regexp') + assert_raise(Fluent::TimeParser::TimeParseError) { + @parser.instance.parse("1990 Oct 22 10:52:01 TZ-6 scapegoat.dmz.example.org 10.1.2.32 sched[0]: That's All Folks!") { |time, record| } + } end data('regexp' => 'regexp', 'string' => 'string') From 6fc5fb420ffbb5cc50989fdbf43d553cfe380295 Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Thu, 5 Sep 2019 13:46:26 +0900 Subject: [PATCH 6/8] parser_syslog: Remove squeeze from new parser Signed-off-by: Masahiro Nakagawa --- lib/fluent/plugin/parser_syslog.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fluent/plugin/parser_syslog.rb b/lib/fluent/plugin/parser_syslog.rb index 27b96da996..368ad5181d 100644 --- a/lib/fluent/plugin/parser_syslog.rb +++ b/lib/fluent/plugin/parser_syslog.rb @@ -232,7 +232,7 @@ def parse_rfc3164(text, &block) msg.chomp! record['message'] = msg - time = @time_parser.parse(time_str.squeeze(SPLIT_CHAR)) + time = @time_parser.parse(time_str) record['time'] = time_str if @keep_time_key yield time, record From 2bc4294a01c6354767550325a8049355f78f7885 Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Thu, 5 Sep 2019 13:55:15 +0900 Subject: [PATCH 7/8] parser_syslog: Check empty priority case in new parser Signed-off-by: Masahiro Nakagawa --- lib/fluent/plugin/parser_syslog.rb | 7 +++++-- test/plugin/test_parser_syslog.rb | 9 +++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/fluent/plugin/parser_syslog.rb b/lib/fluent/plugin/parser_syslog.rb index 368ad5181d..bd003d1da4 100644 --- a/lib/fluent/plugin/parser_syslog.rb +++ b/lib/fluent/plugin/parser_syslog.rb @@ -155,14 +155,17 @@ def parse_plain(text, &block) end SPLIT_CHAR = ' '.freeze - PRI_START_CHAR = '<'.freeze def parse_rfc3164(text, &block) pri = nil cursor = 0 if @with_priority - if text.start_with?(PRI_START_CHAR) + if text.start_with?('<'.freeze) i = text.index('>'.freeze, 1) + if i < 2 + yield nil, nil + return + end pri = text.slice(1, i - 1).to_i cursor = i + 1 else diff --git a/test/plugin/test_parser_syslog.rb b/test/plugin/test_parser_syslog.rb index e0f5b3fdd4..841e2a3cdd 100644 --- a/test/plugin/test_parser_syslog.rb +++ b/test/plugin/test_parser_syslog.rb @@ -55,6 +55,15 @@ def test_parse_with_priority(param) assert_equal("%b %d %H:%M:%S", @parser.instance.patterns['time_format']) end + data('regexp' => 'regexp', 'string' => 'string') + def test_parse_with_empty_priority(param) + @parser.configure('with_priority' => true, 'parser_type' => param) + @parser.instance.parse('<>Feb 28 12:00:00 192.168.0.1 fluentd[11111]: [error] Syslog test') { |time, record| + assert_nil time + assert_nil record + } + end + data('regexp' => 'regexp', 'string' => 'string') def test_parse_without_colon(param) @parser.configure({'parser_type' => param}) From 60c838f6b34136d6a8aa5ff22eb157f5a8a14513 Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Fri, 6 Sep 2019 18:29:51 +0900 Subject: [PATCH 8/8] parser_syslog: Apply review for better variable name Signed-off-by: Masahiro Nakagawa --- lib/fluent/plugin/parser_syslog.rb | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/fluent/plugin/parser_syslog.rb b/lib/fluent/plugin/parser_syslog.rb index bd003d1da4..cd3da593a8 100644 --- a/lib/fluent/plugin/parser_syslog.rb +++ b/lib/fluent/plugin/parser_syslog.rb @@ -175,14 +175,14 @@ def parse_rfc3164(text, &block) end # header part - time_diff = 15 # skip Mmm dd hh:mm:ss - time_end = text[cursor + time_diff] + time_size = 15 # skip Mmm dd hh:mm:ss + time_end = text[cursor + time_size] if time_end == SPLIT_CHAR - time_str = text.slice(cursor, time_diff) + time_str = text.slice(cursor, time_size) cursor += 16 # time + ' ' elsif time_end == '.'.freeze # support subsecond time - i = text.index(SPLIT_CHAR, time_diff) + i = text.index(SPLIT_CHAR, time_size) time_str = text.slice(cursor, i - cursor) cursor = i + 1 else @@ -195,9 +195,9 @@ def parse_rfc3164(text, &block) yield nil, nil return end - host_diff = i - cursor - host = text.slice(cursor, host_diff) - cursor += (host_diff + 1) + host_size = i - cursor + host = text.slice(cursor, host_size) + cursor += host_size + 1 record = {'host' => host} record['pri'] = pri if pri @@ -210,9 +210,9 @@ def parse_rfc3164(text, &block) else if text[i - 1] == ':'.freeze if text[i - 2] == ']'.freeze - j = text.index('['.freeze, cursor) - record['ident'] = text.slice(cursor, j - cursor) - record['pid'] = text.slice(j + 1, i - j - 3) # remove '[' / ']:' + left_braket_pos = text.index('['.freeze, cursor) + record['ident'] = text.slice(cursor, left_braket_pos - cursor) + record['pid'] = text.slice(left_braket_pos + 1, i - left_braket_pos - 3) # remove '[' / ']:' else record['ident'] = text.slice(cursor, i - cursor - 1) end @@ -220,9 +220,9 @@ def parse_rfc3164(text, &block) else if @support_colonless_ident if text[i - 1] == ']'.freeze - j = text.index('['.freeze, cursor) - record['ident'] = text.slice(cursor, j - cursor) - record['pid'] = text.slice(j + 1, i - j - 2) # remove '[' / ']' + left_braket_pos = text.index('['.freeze, cursor) + record['ident'] = text.slice(cursor, left_braket_pos - cursor) + record['pid'] = text.slice(left_braket_pos + 1, i - left_braket_pos - 2) # remove '[' / ']' else record['ident'] = text.slice(cursor, i - cursor) end