Skip to content

Commit

Permalink
Merge pull request #3019 from fluent/improve-syslog-parser
Browse files Browse the repository at this point in the history
Improve syslog parser
  • Loading branch information
repeatedly authored Jun 3, 2020
2 parents 1813f3b + 5d60ada commit db98bc1
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 71 deletions.
115 changes: 58 additions & 57 deletions lib/fluent/plugin/parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,22 @@ class SyslogParser < Parser
def initialize
super
@mutex = Mutex.new
@space_count = nil
@regexp = nil
@regexp3164 = nil
@regexp5424 = nil
@regexp_parser = nil
@time_parser_rfc3164 = nil
@time_parser_rfc5424 = nil
@space_count_rfc3164 = nil
@space_count_rfc5424 = nil
@skip_space_count = false
@skip_space_count_rfc3164 = false
@skip_space_count_rfc5424 = false
@time_parser_rfc5424_without_subseconds = nil
end

def configure(conf)
super

@time_parser_rfc3164 = @time_parser_rfc5424 = nil
@time_parser_rfc5424_without_subseconds = nil
@support_rfc5424_without_subseconds = false
@regexp_parser = @parser_type == :regexp
@regexp = case @message_format
when :rfc3164
Expand All @@ -87,6 +91,7 @@ class << self
alias_method :parse, :parse_rfc3164
end
end
setup_time_parser_3164(@time_format)
RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
when :rfc5424
if @regexp_parser
Expand All @@ -99,27 +104,36 @@ class << self
end
end
@time_format = @rfc5424_time_format unless conf.has_key?('time_format')
@support_rfc5424_without_subseconds = true
@skip_space_count_rfc5424 = @time_format.count(' ').zero?
setup_time_parser_5424(@time_format)
RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
when :auto
class << self
alias_method :parse, :parse_auto
end
@time_parser_rfc3164 = time_parser_create(format: @time_format)
@time_parser_rfc5424 = time_parser_create(format: @rfc5424_time_format)
@skip_space_count_rfc5424 = @rfc5424_time_format.count(' ').zero?
setup_time_parser_3164(@time_format)
setup_time_parser_5424(@rfc5424_time_format)
nil
end

@space_count = @time_format.squeeze(' ').count(' ') + 1
@space_count_rfc5424 = @rfc5424_time_format.squeeze(' ').count(' ') + 1
@time_parser = time_parser_create
@time_parser_rfc5424_without_subseconds = time_parser_create(format: "%Y-%m-%dT%H:%M:%S%z")
if @regexp_parser
@regexp3164 = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
@regexp5424 = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
end
end

if ['%b %d %H:%M:%S', '%b %d %H:%M:%S.%N'].include?(@time_format)
@skip_space_count = true
def setup_time_parser_3164(time_fmt)
@time_parser_rfc3164 = time_parser_create(format: time_fmt)
if ['%b %d %H:%M:%S', '%b %d %H:%M:%S.%N'].include?(time_fmt)
@skip_space_count_rfc3164 = true
end
@space_count_rfc3164 = time_fmt.squeeze(' ').count(' ') + 1
end

def setup_time_parser_5424(time_fmt)
@time_parser_rfc5424 = time_parser_create(format: time_fmt)
@time_parser_rfc5424_without_subseconds = time_parser_create(format: "%Y-%m-%dT%H:%M:%S%z")
@skip_space_count_rfc5424 = time_fmt.count(' ').zero?
@space_count_rfc5424 = time_fmt.squeeze(' ').count(' ') + 1
end

# this method is for tests
Expand All @@ -132,18 +146,13 @@ def parse(text)
end

def parse_auto(text, &block)
if REGEXP_DETECT_RFC5424.match(text)
@regexp = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc5424
@support_rfc5424_without_subseconds = true
if REGEXP_DETECT_RFC5424.match?(text)
if @regexp_parser
parse_rfc5424_regex(text, &block)
else
parse_rfc5424(text, &block)
end
else
@regexp = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc3164
if @regexp_parser
parse_rfc3164_regex(text, &block)
else
Expand All @@ -152,6 +161,8 @@ def parse_auto(text, &block)
end
end

SPLIT_CHAR = ' '.freeze

def parse_rfc3164_regex(text, &block)
idx = 0
record = {}
Expand All @@ -169,22 +180,22 @@ def parse_rfc3164_regex(text, &block)

i = idx - 1
sq = false
@space_count.times do
while text[i + 1] == ' '.freeze
@space_count_rfc3164.times do
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
end

i = text.index(' '.freeze, i + 1)
i = text.index(SPLIT_CHAR, i + 1)
end

time_str = sq ? text.slice(idx, i - idx).squeeze(' ') : text.slice(idx, i - idx)
time = @mutex.synchronize { @time_parser.parse(time_str) }
time_str = sq ? text.slice(idx, i - idx).squeeze(SPLIT_CHAR) : text.slice(idx, i - idx)
time = @mutex.synchronize { @time_parser_rfc3164.parse(time_str) }
if @keep_time_key
record['time'] = time_str
end

parse_plain(time, text, i + 1, record, RFC3164_CAPTURES, &block)
parse_plain(@regexp3164, time, text, i + 1, record, RFC3164_CAPTURES, &block)
end

def parse_rfc5424_regex(text, &block)
Expand All @@ -204,40 +215,36 @@ def parse_rfc5424_regex(text, &block)
i = idx - 1
sq = false
@space_count_rfc5424.times {
while text[i + 1] == ' '.freeze
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
end

i = text.index(' '.freeze, i + 1)
i = text.index(SPLIT_CHAR, i + 1)
}

time_str = sq ? text.slice(idx, i - idx).squeeze(' '.freeze) : text.slice(idx, i - idx)
time_str = sq ? text.slice(idx, i - idx).squeeze(SPLIT_CHAR) : text.slice(idx, i - idx)
time = @mutex.synchronize do
begin
@time_parser.parse(time_str)
@time_parser_rfc5424.parse(time_str)
rescue Fluent::TimeParser::TimeParseError => e
if @support_rfc5424_without_subseconds
log.trace(e)
@time_parser_rfc5424_without_subseconds.parse(time_str)
else
raise
end
log.trace(e)
@time_parser_rfc5424_without_subseconds.parse(time_str)
end
end

if @keep_time_key
record['time'] = time_str
end
parse_plain(time, text, i + 1, record, RFC5424_CAPTURES, &block)
parse_plain(@regexp5424, time, text, i + 1, record, RFC5424_CAPTURES, &block)
end

# @param time [EventTime]
# @param idx [Integer] note: this argument is needed to avoid string creation
# @param record [Hash]
# @param capture_list [Array] for performance
def parse_plain(time, text, idx, record, capture_list, &block)
m = @regexp.match(text, idx)
def parse_plain(re, time, text, idx, record, capture_list, &block)
m = re.match(text, idx)
if m.nil?
yield nil, nil
return
Expand All @@ -262,8 +269,6 @@ def parse_plain(time, text, idx, record, capture_list, &block)
yield time, record
end

SPLIT_CHAR = ' '.freeze

def parse_rfc3164(text, &block)
pri = nil
cursor = 0
Expand All @@ -282,7 +287,7 @@ def parse_rfc3164(text, &block)
end
end

if @skip_space_count
if @skip_space_count_rfc3164
# header part
time_size = 15 # skip Mmm dd hh:mm:ss
time_end = text[cursor + time_size]
Expand All @@ -301,15 +306,15 @@ def parse_rfc3164(text, &block)
else
i = cursor - 1
sq = false
@space_count.times do
while text[i + 1] == ' '.freeze
@space_count_rfc3164.times do
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
end
i = text.index(' '.freeze, i + 1)
i = text.index(SPLIT_CHAR, i + 1)
end

time_str = sq ? text.slice(idx, i - cursor).squeeze(' '.freeze) : text.slice(cursor, i - cursor)
time_str = sq ? text.slice(idx, i - cursor).squeeze(SPLIT_CHAR) : text.slice(cursor, i - cursor)
cursor = i + 1
end

Expand Down Expand Up @@ -358,7 +363,7 @@ def parse_rfc3164(text, &block)
msg.chomp!
record['message'] = msg

time = @time_parser.parse(time_str)
time = @time_parser_rfc3164.parse(time_str)
record['time'] = time_str if @keep_time_key

yield time, record
Expand Down Expand Up @@ -393,7 +398,7 @@ def parse_rfc5424(text, &block)
else
i = cursor - 1
sq = false
@space_count.times do
@space_count_rfc5424.times do
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
Expand Down Expand Up @@ -472,19 +477,15 @@ def parse_rfc5424(text, &block)

# message part
if cursor != text.bytesize
msg = text[cursor + 1..-1]
msg = text.slice(cursor + 1, text.bytesize)
msg.chomp!
record['message'] = msg
end

time = begin
@time_parser.parse(time_str)
@time_parser_rfc5424.parse(time_str)
rescue Fluent::TimeParser::TimeParseError => e
if @support_rfc5424_without_subseconds
@time_parser_rfc5424_without_subseconds.parse(time_str)
else
raise
end
@time_parser_rfc5424_without_subseconds.parse(time_str)
end
record['time'] = time_str if @keep_time_key

Expand Down
15 changes: 1 addition & 14 deletions test/plugin/test_parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,6 @@ def test_auto_with_legacy_syslog_message(param)
assert_equal(event_time("Feb 28 00:00:12", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected, record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

data('regexp' => 'regexp', 'string' => 'string')
Expand All @@ -497,7 +496,6 @@ def test_auto_with_legacy_syslog_priority_message(param)
assert_equal(event_time("Feb 28 12:00:00", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 6), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

data('regexp' => 'regexp', 'string' => 'string')
Expand All @@ -517,7 +515,6 @@ def test_parse_with_rfc5424_message(param)
assert_equal 16, record["pri"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

data('regexp' => 'regexp', 'string' => 'string')
Expand All @@ -537,7 +534,7 @@ def test_parse_with_rfc5424_structured_message(param)
record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])end
end

data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_both_message_type(param)
Expand All @@ -553,7 +550,6 @@ def test_parse_with_both_message_type(param)
assert_equal(event_time("Feb 28 12:00:00", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 1), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -564,21 +560,18 @@ def test_parse_with_both_message_type(param)
record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<1>Feb 28 12:00:02 192.168.0.1 fluentd[11111]: [error] Syslog test 2>1'
@parser.instance.parse(text) do |time, record|
assert_equal(event_time("Feb 28 12:00:02", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 1, 'message'=> '[error] Syslog test 2>1'), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<1>Feb 28 12:00:02 192.168.0.1 fluentd[11111]: [error] Syslog test'
@parser.instance.parse(text) do |time, record|
assert_equal(event_time("Feb 28 12:00:02", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 1), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -588,7 +581,6 @@ def test_parse_with_both_message_type(param)
assert_equal "-", record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

data('regexp' => 'regexp', 'string' => 'string')
Expand All @@ -605,7 +597,6 @@ def test_parse_with_both_message_type_and_priority(param)
assert_equal(event_time("Feb 28 12:00:00", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 6), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -616,14 +607,12 @@ def test_parse_with_both_message_type_and_priority(param)
record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>Feb 28 12:00:02 192.168.0.1 fluentd[11111]: [error] Syslog test'
@parser.instance.parse(text) do |time, record|
assert_equal(event_time("Feb 28 12:00:02", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 16), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -633,7 +622,6 @@ def test_parse_with_both_message_type_and_priority(param)
assert_equal "-", record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15Z 192.168.0.1 fluentd - - - Hi, from Fluentd without subseconds!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -643,7 +631,6 @@ def test_parse_with_both_message_type_and_priority(param)
assert_equal "-", record["extradata"]
assert_equal "Hi, from Fluentd without subseconds!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end
end
end

0 comments on commit db98bc1

Please sign in to comment.