Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

syslog_parser: Add string parser for rfc5424 #3015

Merged
merged 1 commit into from
Jun 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 143 additions & 3 deletions lib/fluent/plugin/parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def initialize
@space_count = nil
@space_count_rfc5424 = nil
@skip_space_count = false
@skip_space_count_rfc5424 = false
end

def configure(conf)
Expand All @@ -88,18 +89,26 @@ class << self
end
RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
when :rfc5424
class << self
alias_method :parse, :parse_rfc5424_regex
if @regexp_parser
class << self
alias_method :parse, :parse_rfc5424_regex
end
else
class << self
alias_method :parse, :parse_rfc5424
end
end
@time_format = @rfc5424_time_format unless conf.has_key?('time_format')
@support_rfc5424_without_subseconds = true
@skip_space_count_rfc5424 = @time_format.count(' ').zero?
RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
when :auto
class << self
alias_method :parse, :parse_auto
end
@time_parser_rfc3164 = time_parser_create(format: @time_format)
@time_parser_rfc5424 = time_parser_create(format: @rfc5424_time_format)
@skip_space_count_rfc5424 = @rfc5424_time_format.count(' ').zero?
nil
end

Expand Down Expand Up @@ -127,7 +136,11 @@ def parse_auto(text, &block)
@regexp = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc5424
@support_rfc5424_without_subseconds = true
parse_rfc5424_regex(text, &block)
if @regexp_parser
parse_rfc5424_regex(text, &block)
else
parse_rfc5424(text, &block)
end
else
@regexp = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc3164
Expand Down Expand Up @@ -350,6 +363,133 @@ def parse_rfc3164(text, &block)

yield time, record
end

NILVALUE = '-'.freeze

def parse_rfc5424(text, &block)
pri = nil
cursor = 0
if @with_priority
if text.start_with?('<'.freeze)
i = text.index('>'.freeze, 1)
if i < 2
yield nil, nil
return
end
pri = text.slice(1, i - 1).to_i
i = text.index(SPLIT_CHAR, i)
cursor = i + 1
else
yield nil, nil
return
end
end

# timestamp part
if @skip_space_count_rfc5424
i = text.index(SPLIT_CHAR, cursor)
time_str = text.slice(cursor, i - cursor)
cursor = i + 1
else
i = cursor - 1
sq = false
@space_count.times do
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
end
i = text.index(SPLIT_CHAR, i + 1)
end

time_str = sq ? text.slice(idx, i - cursor).squeeze(SPLIT_CHAR) : text.slice(cursor, i - cursor)
cursor = i + 1
end

# Repeat same code for the performance

# host part
i = text.index(SPLIT_CHAR, cursor)
unless i
yield nil, nil
return
end
slice_size = i - cursor
host = text.slice(cursor, slice_size)
cursor += slice_size + 1

# ident part
i = text.index(SPLIT_CHAR, cursor)
unless i
yield nil, nil
return
end
slice_size = i - cursor
ident = text.slice(cursor, slice_size)
cursor += slice_size + 1

# pid part
i = text.index(SPLIT_CHAR, cursor)
unless i
yield nil, nil
return
end
slice_size = i - cursor
pid = text.slice(cursor, slice_size)
cursor += slice_size + 1

# msgid part
i = text.index(SPLIT_CHAR, cursor)
unless i
yield nil, nil
return
end
slice_size = i - cursor
msgid = text.slice(cursor, slice_size)
cursor += slice_size + 1

record = {'host' => host, 'ident' => ident, 'pid' => pid, 'msgid' => msgid}
record['pri'] = pri if pri

# extradata part
ed_start = text[cursor]
if ed_start == NILVALUE
record['extradata'] = NILVALUE
cursor += 1
else
start = cursor
i = text.index('] '.freeze, cursor)
extradata = if i
diff = i + 1 - start # calculate ']' position
cursor += diff
text.slice(start, diff)
else # No message part case
cursor = text.bytesize
text.slice(start, cursor)
end
extradata.tr!("\\".freeze, ''.freeze)
record['extradata'] = extradata
end

# message part
if cursor != text.bytesize
msg = text[cursor + 1..-1]
msg.chomp!
record['message'] = msg
end

time = begin
@time_parser.parse(time_str)
rescue Fluent::TimeParser::TimeParseError => e
if @support_rfc5424_without_subseconds
@time_parser_rfc5424_without_subseconds.parse(time_str)
else
raise
end
end
record['time'] = time_str if @keep_time_key

yield time, record
end
end
end
end
75 changes: 62 additions & 13 deletions test/plugin/test_parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,13 @@ def test_parse_various_characters_for_tag_with_priority(param)
end

class TestRFC5424Regexp < self
def test_parse_with_rfc5424_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -216,11 +218,13 @@ def test_parse_with_rfc5424_message
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_message_trailing_eol
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_trailing_eol(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = "<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!\n"
@parser.instance.parse(text) do |time, record|
Expand All @@ -233,11 +237,13 @@ def test_parse_with_rfc5424_message_trailing_eol
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_multiline_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_multiline_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = "<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi,\nfrom\nFluentd!"
@parser.instance.parse(text) do |time, record|
Expand All @@ -250,10 +256,12 @@ def test_parse_with_rfc5424_multiline_message
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_message_and_without_priority
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_and_without_priority(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'parser_type' => param
)
text = '2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -266,10 +274,12 @@ def test_parse_with_rfc5424_message_and_without_priority
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_empty_message_and_without_priority
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_empty_message_and_without_priority(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'parser_type' => param
)
text = '2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - -'
@parser.instance.parse(text) do |time, record|
Expand All @@ -282,10 +292,12 @@ def test_parse_with_rfc5424_empty_message_and_without_priority
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

def test_parse_with_rfc5424_message_without_time_format
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_without_time_format(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -297,10 +309,12 @@ def test_parse_with_rfc5424_message_without_time_format
end
end

def test_parse_with_rfc5424_message_with_priority_and_pid
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_with_priority_and_pid(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<28>1 2018-09-26T15:54:26.620412+09:00 machine minissdpd 1298 - - peer 192.168.0.5:50123 is not from a LAN'
@parser.instance.parse(text) do |time, record|
Expand All @@ -312,11 +326,13 @@ def test_parse_with_rfc5424_message_with_priority_and_pid
end
end

def test_parse_with_rfc5424_structured_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_structured_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] [Hi] from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -329,11 +345,13 @@ def test_parse_with_rfc5424_structured_message
end
end

def test_parse_with_rfc5424_multiple_structured_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_multiple_structured_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"][exampleSDID@20224 class="high"] Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -346,11 +364,13 @@ def test_parse_with_rfc5424_multiple_structured_message
end
end

def test_parse_with_rfc5424_message_includes_right_bracket
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_includes_right_bracket(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] [Hi] from Fluentd]!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -363,11 +383,13 @@ def test_parse_with_rfc5424_message_includes_right_bracket
end
end

def test_parse_with_rfc5424_empty_message
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_empty_message(param)
@parser.configure(
'time_format' => '%Y-%m-%dT%H:%M:%S.%L%z',
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"]'
@parser.instance.parse(text) do |time, record|
Expand All @@ -380,10 +402,35 @@ def test_parse_with_rfc5424_empty_message
end
end

def test_parse_with_rfc5424_message_without_subseconds
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_space_empty_message(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] '
@parser.instance.parse(text) do |time, record|
if param == 'string'
assert_equal(event_time("2017-02-06T13:14:15.003Z", format: '%Y-%m-%dT%H:%M:%S.%L%z'), time)
assert_equal "11111", record["pid"]
assert_equal "ID24224", record["msgid"]
assert_equal "[exampleSDID@20224 iut=\"3\" eventSource=\"Application\" eventID=\"11211\"]",
record["extradata"]
assert_equal '', record["message"]
else
assert_nil time
assert_nil record
end
end
end

data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_without_subseconds(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -395,10 +442,12 @@ def test_parse_with_rfc5424_message_without_subseconds
end
end

def test_parse_with_rfc5424_message_both_timestamp
data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_rfc5424_message_both_timestamp(param)
@parser.configure(
'message_format' => 'rfc5424',
'with_priority' => true,
'parser_type' => param
)
text = '<16>1 2017-02-06T13:14:15Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand Down