Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Syslog parser works with any time format #2886

Merged
merged 12 commits into from
Mar 24, 2020
147 changes: 114 additions & 33 deletions lib/fluent/plugin/parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ module Plugin
class SyslogParser < Parser
Plugin.register_parser('syslog', self)

# TODO: Remove them since these regexps are no longer needed. but keep them for compatibility for now
# From existence TextParser pattern
REGEXP = /^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[^ :\[]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
# From in_syslog default pattern
Expand All @@ -36,8 +37,17 @@ class SyslogParser < Parser
REGEXP_RFC5424_WITH_PRI = Regexp.new(<<~'EOS'.chomp % REGEXP_RFC5424, Regexp::MULTILINE)
\A<(?<pri>[0-9]{1,3})\>[1-9]\d{0,2} %s\z
EOS

REGEXP_DETECT_RFC5424 = /^\<[0-9]{1,3}\>[1-9]\d{0,2}/

RFC3164_WITHOUT_TIME_AND_PRI_REGEXP = /(?<host>[^ ]*) (?<ident>[^ :\[]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
RFC3164_CAPTURES = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP.names.freeze
RFC3164_PRI_REGEXP = /^<(?<pri>[0-9]{1,3})>/

RFC5424_WITHOUT_TIME_AND_PRI_REGEXP = /(?<host>[!-~]{1,255}) (?<ident>[!-~]{1,48}) (?<pid>[!-~]{1,128}) (?<msgid>[!-~]{1,32}) (?<extradata>(?:\-|(?:\[.*?(?<!\\)\])+))(?: (?<message>.+))?\z/m
RFC5424_CAPTURES = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP.names.freeze
RFC5424_PRI_REGEXP = /^<(?<pri>\d{1,3})>(?<version>\d\d{0,2})\s/
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, version is not needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed 8c6b8bd


config_set_default :time_format, "%b %d %H:%M:%S"
desc 'If the incoming logs have priority prefix, e.g. <9>, set true'
config_param :with_priority, :bool, default: false
Expand All @@ -53,6 +63,8 @@ class SyslogParser < Parser
def initialize
super
@mutex = Mutex.new
@space_count = nil
@space_count_rfc5424 = nil
end

def configure(conf)
Expand All @@ -66,21 +78,21 @@ def configure(conf)
when :rfc3164
if @regexp_parser
class << self
alias_method :parse, :parse_plain
alias_method :parse, :parse_rfc3164_regex
end
else
class << self
alias_method :parse, :parse_rfc3164
end
end
@with_priority ? REGEXP_WITH_PRI : REGEXP
RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
when :rfc5424
class << self
alias_method :parse, :parse_plain
alias_method :parse, :parse_rfc5424_regex
end
@time_format = @rfc5424_time_format unless conf.has_key?('time_format')
@support_rfc5424_without_subseconds = true
@with_priority ? REGEXP_RFC5424_WITH_PRI : REGEXP_RFC5424_NO_PRI
RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
when :auto
class << self
alias_method :parse, :parse_auto
Expand All @@ -89,10 +101,14 @@ class << self
@time_parser_rfc5424 = time_parser_create(format: @rfc5424_time_format)
nil
end

@space_count = @time_format.squeeze(' ').count(' ') + 1
@space_count_rfc5424 = @rfc5424_time_format.squeeze(' ').count(' ') + 1
@time_parser = time_parser_create
@time_parser_rfc5424_without_subseconds = time_parser_create(format: "%Y-%m-%dT%H:%M:%S%z")
end

# this method is for tests
def patterns
{'format' => @regexp, 'time_format' => @time_format}
end
Expand All @@ -103,51 +119,116 @@ def parse(text)

def parse_auto(text, &block)
if REGEXP_DETECT_RFC5424.match(text)
@regexp = @with_priority ? REGEXP_RFC5424_WITH_PRI : REGEXP_RFC5424_NO_PRI
@regexp = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc5424
@support_rfc5424_without_subseconds = true
parse_plain(text, &block)
parse_rfc5424_regex(text, &block)
else
@regexp = @with_priority ? REGEXP_WITH_PRI : REGEXP
@regexp = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc3164
if @regexp_parser
parse_plain(text, &block)
parse_rfc3164_regex(text, &block)
else
parse_rfc3164(text, &block)
end
end
end

def parse_plain(text, &block)
m = @regexp.match(text)
unless m
yield nil, nil
return
def parse_rfc3164_regex(text, &block)
idx = 0
record = {}

if @with_priority
if RFC3164_PRI_REGEXP.match?(text)
v = text.index('>')
record['pri'] = text[1..v].to_i # trim `<` and ``>
idx = v + 1
else
yield(nil, nil)
return
end
end

time = nil
i = idx - 1
sq = false
@space_count.times do
while text[i + 1] == ' '.freeze
sq = true
i += 1
end

i = text.index(' '.freeze, i + 1)
end

time_str = sq ? text.slice(idx, i - idx).squeeze(' ') : text.slice(idx, i - idx)
time = @mutex.synchronize { @time_parser.parse(time_str) }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can remove this mutex.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this code can be a race condition. or I missed something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I missed it. I forgot to improve this point, e.g. mutex should be moved to TimeParser and disable mutex feature for popular one-thread usecase.

if @keep_time_key
record['time'] = time_str
end

parse_plain(time, text, i + 1, record, RFC3164_CAPTURES, &block)
end

def parse_rfc5424_regex(text, &block)
idx = 0
record = {}

m.names.each { |name|
if value = m[name]
if @with_priority
if (m = RFC5424_PRI_REGEXP.match(text))
record['pri'] = m['pri']
record['version'] = m['version']
idx = m.end(0)
else
yield(nil, nil)
return
end
end

i = idx - 1
sq = false
@space_count_rfc5424.times {
while text[i + 1] == ' '.freeze
sq = true
i += 1
end

i = text.index(' '.freeze, i + 1)
}

time_str = sq ? text.slice(idx, i - idx).squeeze(' '.freeze) : text.slice(idx, i - idx)
time = @mutex.synchronize do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same with 3164

begin
@time_parser.parse(time_str)
rescue Fluent::TimeParser::TimeParseError => e
if @support_rfc5424_without_subseconds
log.trace(e)
@time_parser_rfc5424_without_subseconds.parse(time_str)
else
raise
end
end
end

if @keep_time_key
record['time'] = time_str
end
parse_plain(time, text, i + 1, record, RFC5424_CAPTURES, &block)
end

# @param time [EventTime]
# @param idx [Integer] note: this argument is needed to avoid string creation
# @param record [Hash]
# @param capture_list [Array] for performance
def parse_plain(time, text, idx, record, capture_list, &block)
m = @regexp.match(text, idx)
if m.nil?
yield nil, nil
return
end

capture_list.each { |name|
if value = (m[name] rescue nil)
case name
when "pri"
record['pri'] = value.to_i
when "time"
time = @mutex.synchronize do
time_str = value.squeeze(' ')
begin
@time_parser.parse(time_str)
rescue Fluent::TimeParser::TimeParseError => e
if @support_rfc5424_without_subseconds
log.trace(e)
@time_parser_rfc5424_without_subseconds.parse(time_str)
else
raise
end
end
end
record[name] = value if @keep_time_key
when "message"
value.chomp!
record[name] = value
Expand Down
Loading