Skip to content

Commit

Permalink
Merge pull request #1927 from okkez/support-regexp-type
Browse files Browse the repository at this point in the history
Support regexp type
  • Loading branch information
repeatedly authored Apr 5, 2018
2 parents 03ce0bd + 72cff25 commit 0fa6a2d
Show file tree
Hide file tree
Showing 14 changed files with 84 additions and 78 deletions.
13 changes: 13 additions & 0 deletions lib/fluent/config/types.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ def self.bool_value(str)
end
end

def self.regexp_value(str)
return nil unless str
return Regexp.compile(str) unless str.start_with?("/")
right_slash_position = str.rindex("/")
options = str[(right_slash_position + 1)..-1]
option = 0
option |= Regexp::IGNORECASE if options.include?("i")
option |= Regexp::MULTILINE if options.include?("m")
Regexp.compile(str[1...right_slash_position], option)
end

STRING_TYPE = Proc.new { |val, opts|
v = val.to_s
v = v.frozen? ? v.dup : v # config_param can't assume incoming string is mutable
Expand All @@ -89,6 +100,7 @@ def self.bool_value(str)
SIZE_TYPE = Proc.new { |val, opts| Config.size_value(val) }
BOOL_TYPE = Proc.new { |val, opts| Config.bool_value(val) }
TIME_TYPE = Proc.new { |val, opts| Config.time_value(val) }
REGEXP_TYPE = Proc.new { |val, opts| Config.regexp_value(val) }

REFORMAT_VALUE = ->(type, value) {
if value.nil?
Expand All @@ -101,6 +113,7 @@ def self.bool_value(str)
when :size then Config.size_value(value)
when :bool then Config.bool_value(value)
when :time then Config.time_value(value)
when :regexp then Config.regexp_value(value)
else
raise "unknown type in REFORMAT: #{type}"
end
Expand Down
1 change: 1 addition & 0 deletions lib/fluent/configurable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def self.lookup_type(type)
time: Config::TIME_TYPE,
hash: Config::HASH_TYPE,
array: Config::ARRAY_TYPE,
regexp: Config::REGEXP_TYPE,
}.each do |name, type|
register_type(name, type)
end
Expand Down
48 changes: 6 additions & 42 deletions lib/fluent/plugin/filter_grep.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,52 +45,28 @@ def initialize
desc "The field name to which the regular expression is applied."
config_param :key, :string
desc "The regular expression."
config_param :pattern do |value|
if value.start_with?("/") and value.end_with?("/")
Regexp.compile(value[1..-2])
else
Regexp.compile(value)
end
end
config_param :pattern, :regexp
end

config_section :exclude, param_name: :excludes, multi: true do
desc "The field name to which the regular expression is applied."
config_param :key, :string
desc "The regular expression."
config_param :pattern do |value|
if value.start_with?("/") and value.end_with?("/")
Regexp.compile(value[1..-2])
else
Regexp.compile(value)
end
end
config_param :pattern, :regexp
end

config_section :and, param_name: :and_conditions, multi: true do
config_section :regexp, param_name: :regexps, multi: true do
desc "The field name to which the regular expression is applied."
config_param :key, :string
desc "The regular expression."
config_param :pattern do |value|
if value.start_with?("/") and value.end_with?("/")
Regexp.compile(value[1..-2])
else
Regexp.compile(value)
end
end
config_param :pattern, :regexp
end
config_section :exclude, param_name: :excludes, multi: true do
desc "The field name to which the regular expression is applied."
config_param :key, :string
desc "The regular expression."
config_param :pattern do |value|
if value.start_with?("/") and value.end_with?("/")
Regexp.compile(value[1..-2])
else
Regexp.compile(value)
end
end
config_param :pattern, :regexp
end
end

Expand All @@ -99,25 +75,13 @@ def initialize
desc "The field name to which the regular expression is applied."
config_param :key, :string
desc "The regular expression."
config_param :pattern do |value|
if value.start_with?("/") and value.end_with?("/")
Regexp.compile(value[1..-2])
else
Regexp.compile(value)
end
end
config_param :pattern, :regexp
end
config_section :exclude, param_name: :excludes, multi: true do
desc "The field name to which the regular expression is applied."
config_param :key, :string
desc "The regular expression."
config_param :pattern do |value|
if value.start_with?("/") and value.end_with?("/")
Regexp.compile(value[1..-2])
else
Regexp.compile(value)
end
end
config_param :pattern, :regexp
end
end

Expand Down
2 changes: 1 addition & 1 deletion lib/fluent/plugin/parser_apache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ module Plugin
class ApacheParser < RegexpParser
Plugin.register_parser("apache", self)

config_set_default :expression, %q{/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/}
config_set_default :expression, /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
config_set_default :time_format, "%d/%b/%Y:%H:%M:%S %z"
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/fluent/plugin/parser_apache_error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module Fluent
module Plugin
class ApacheErrorParser < RegexpParser
Plugin.register_parser("apache_error", self)
config_set_default :expression, %q{/^\[[^ ]* (?<time>[^\]]*)\] \[(?<level>[^\]]*)\](?: \[pid (?<pid>[^\]]*)\])?( \[client (?<client>[^\]]*)\])? (?<message>.*)$/}
config_set_default :expression, /^\[[^ ]* (?<time>[^\]]*)\] \[(?<level>[^\]]*)\](?: \[pid (?<pid>[^\]]*)\])?( \[client (?<client>[^\]]*)\])? (?<message>.*)$/
end
end
end
4 changes: 1 addition & 3 deletions lib/fluent/plugin/parser_ltsv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ class LabeledTSVParser < Parser
desc 'The delimiter character (or string) of TSV values'
config_param :delimiter, :string, default: "\t"
desc 'The delimiter pattern of TSV values'
config_param :delimiter_pattern, default: nil do |value|
Regexp.compile(value[1..-2]) if value
end
config_param :delimiter_pattern, :regexp, default: nil
desc 'The delimiter character between field name and value'
config_param :label_delimiter, :string, default: ":"

Expand Down
2 changes: 1 addition & 1 deletion lib/fluent/plugin/parser_multiline.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def configure(conf)
if regexp.named_captures.empty?
raise "No named captures"
end
regexp_conf = Fluent::Config::Element.new("", "", { "expression" => "/#{formats}/", "multiline" => true }, [])
regexp_conf = Fluent::Config::Element.new("", "", { "expression" => "/#{formats}/m" }, [])
@parser = Fluent::Plugin::RegexpParser.new
@parser.configure(conf + regexp_conf)
rescue => e
Expand Down
2 changes: 1 addition & 1 deletion lib/fluent/plugin/parser_nginx.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ module Plugin
class NginxParser < RegexpParser
Plugin.register_parser("nginx", self)

config_set_default :expression, %q{/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/}
config_set_default :expression, /^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
config_set_default :time_format, "%d/%b/%Y:%H:%M:%S %z"
end
end
Expand Down
26 changes: 12 additions & 14 deletions lib/fluent/plugin/parser_regexp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,28 @@ class RegexpParser < Parser
Plugin.register_parser("regexp", self)

desc 'Regular expression for matching logs'
config_param :expression, :string
config_param :expression, :regexp
desc 'Ignore case in matching'
config_param :ignorecase, :bool, default: false
config_param :ignorecase, :bool, default: false, deprecated: "Use /pattern/i instead, this option is no longer effective"
desc 'Build regular expression as a multline mode'
config_param :multiline, :bool, default: false
config_param :multiline, :bool, default: false, deprecated: "Use /pattern/m instead, this option is no longer effective"

config_set_default :time_key, 'time'

def configure(conf)
super

expr = if @expression[0] == "/" && @expression[-1] == "/"
@expression[1..-2]
else
@expression
end
regexp_option = 0
regexp_option |= Regexp::IGNORECASE if @ignorecase
regexp_option |= Regexp::MULTILINE if @multiline
@regexp = Regexp.new(expr, regexp_option)
# For compat layer
if @ignorecase || @multiline
options = 0
options |= Regexp::IGNORECASE if @ignorecase
options |= Regexp::MULTILINE if @multiline
@expression = Regexp.compile(@expression.source, options)
end
@regexp = @expression # For backward compatibility
end

def parse(text)
m = @regexp.match(text)
m = @expression.match(text)
unless m
yield nil, nil
return
Expand Down
5 changes: 5 additions & 0 deletions test/config/test_config_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class AllTypes
config_param :param_time, :time
config_param :param_hash, :hash
config_param :param_array, :array
config_param :param_regexp, :regexp
end

class TestV1Parser < ::Test::Unit::TestCase
Expand Down Expand Up @@ -451,6 +452,7 @@ def prepare_config
param_time 10m
param_hash { "key1": "value1", "key2": 2 }
param_array ["value1", "value2", 100]
param_regexp /pattern/
])
target = AllTypes.new.configure(conf)
assert_equal(conf.to_s, target.config.to_s)
Expand All @@ -465,6 +467,7 @@ def prepare_config
param_time 10m
param_hash {"key1":"value1","key2":2}
param_array ["value1","value2",100]
param_regexp /pattern/
</ROOT>
DUMP
assert_equal(expected, conf.to_s)
Expand All @@ -490,6 +493,7 @@ def parse_text(text)
param_time 10m
param_hash { "key1": "value1", "key2": 2 }
param_array ["value1", "value2", 100]
param_regexp /pattern/
])
target = AllTypes.new.configure(conf)
assert_equal(conf.to_s, target.config.to_s)
Expand All @@ -504,6 +508,7 @@ def parse_text(text)
param_time 10m
param_hash { "key1": "value1", "key2": 2 }
param_array ["value1", "value2", 100]
param_regexp /pattern/
</ROOT>
DUMP
assert_equal(expected, conf.to_s)
Expand Down
7 changes: 4 additions & 3 deletions test/config/test_configure_proxy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,10 @@ class TestConfigureProxy < ::Test::Unit::TestCase
assert_nothing_raised{ @proxy.config_param(:p7, :time, **opt) }
assert_nothing_raised{ @proxy.config_param(:p8, :hash, **opt) }
assert_nothing_raised{ @proxy.config_param(:p9, :array, **opt) }
assert_nothing_raised{ @proxy.config_param(:pa, :regexp, **opt) }
end

data(string: :string, integer: :integer, float: :float, size: :size, bool: :bool, time: :time, hash: :hash, array: :array)
data(string: :string, integer: :integer, float: :float, size: :size, bool: :bool, time: :time, hash: :hash, array: :array, regexp: :regexp)
test 'deny list for non-enum types' do |type|
assert_raise ArgumentError.new(":list is valid only for :enum type, but #{type}: arg") do
@proxy.config_argument(:arg, type, list: [:a, :b])
Expand All @@ -205,7 +206,7 @@ class TestConfigureProxy < ::Test::Unit::TestCase
end
end

data(string: :string, integer: :integer, float: :float, size: :size, bool: :bool, time: :time)
data(string: :string, integer: :integer, float: :float, size: :size, bool: :bool, time: :time, regexp: :regexp)
test 'deny value_type for non-hash/array types' do |type|
assert_raise ArgumentError.new(":value_type is valid only for :hash and :array, but #{type}: arg") do
@proxy.config_argument(:arg, type, value_type: :string)
Expand All @@ -215,7 +216,7 @@ class TestConfigureProxy < ::Test::Unit::TestCase
end
end

data(string: :string, integer: :integer, float: :float, size: :size, bool: :bool, time: :time, array: :array)
data(string: :string, integer: :integer, float: :float, size: :size, bool: :bool, time: :time, array: :array, regexp: :regexp)
test 'deny symbolize_keys for non-hash types' do |type|
assert_raise ArgumentError.new(":symbolize_keys is valid only for :hash, but #{type}: arg") do
@proxy.config_argument(:arg, type, symbolize_keys: true)
Expand Down
29 changes: 29 additions & 0 deletions test/config/test_types.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,26 @@ class TestConfigTypes < ::Test::Unit::TestCase
end
end

sub_test_case 'Config.regexp_value' do
data("empty" => [//, "//"],
"plain" => [/regexp/, "/regexp/"],
"zero width" => [/^$/, "/^$/"],
"character classes" => [/[a-z]/, "/[a-z]/"],
"meta charactersx" => [/.+.*?\d\w\s\S/, '/.+.*?\d\w\s\S/'])
test 'normal case' do |(expected, str)|
assert_equal(expected, Config.regexp_value(str))
end

data("empty" => [//, ""],
"plain" => [/regexp/, "regexp"],
"zero width" => [/^$/, "^$"],
"character classes" => [/[a-z]/, "[a-z]"],
"meta charactersx" => [/.+.*?\d\w\s\S/, '.+.*?\d\w\s\S'])
test 'w/o slashes' do |(expected, str)|
assert_equal(expected, Config.regexp_value(str))
end
end

sub_test_case 'type converters for config_param definitions' do
test 'string' do
assert_equal 'test', Config::STRING_TYPE.call('test', {})
Expand Down Expand Up @@ -134,6 +154,15 @@ class TestConfigTypes < ::Test::Unit::TestCase
assert_equal 86400, Config::TIME_TYPE.call('1d', {})
end

data("empty" => [//, "//"],
"plain" => [/regexp/, "/regexp/"],
"zero width" => [/^$/, "/^$/"],
"character classes" => [/[a-z]/, "/[a-z]/"],
"meta charactersx" => [/.+.*?\d\w\s\S/, '/.+.*?\d\w\s\S/'])
test 'regexp' do |(expected, str)|
assert_equal(expected, Config::REGEXP_TYPE.call(str, {}))
end

test 'hash' do
assert_equal({"x"=>"v","k"=>1}, Config::HASH_TYPE.call('{"x":"v","k":1}', {}))
assert_equal({"x"=>"v","k"=>"1"}, Config::HASH_TYPE.call('x:v,k:1', {}))
Expand Down
8 changes: 3 additions & 5 deletions test/plugin/test_in_exec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,7 @@ def create_driver(conf)
d = create_driver REGEXP_CONFIG
assert{ d.instance.parser.is_a? Fluent::Plugin::RegexpParser }
assert_equal "regex_tag", d.instance.tag
expression = <<'EXP'.chomp
(?<time>[^\]]*) (?<message>[^ ]*)
EXP
expression = /(?<time>[^\]]*) (?<message>[^ ]*)/
assert_equal expression, d.instance.parser.expression
assert_nil d.instance.extract_config
end
Expand Down Expand Up @@ -194,8 +192,8 @@ def create_driver(conf)
test 'configure_with_regexp' do
d = create_driver REGEXP_CONFIG_COMPAT
assert{ d.instance.parser.is_a? Fluent::Plugin::RegexpParser }
assert_equal '(?<time>[^\]]*) (?<message>[^ ]*)', d.instance.parser.expression
assert_equal 'regex_tag', d.instance.tag
assert_equal(/(?<time>[^\]]*) (?<message>[^ ]*)/, d.instance.parser.expression)
assert_equal('regex_tag', d.instance.tag)
end
end

Expand Down
13 changes: 6 additions & 7 deletions test/plugin/test_parser_regexp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -160,25 +160,24 @@ def test_default_options
'types' => 'user:string,date:time:%d/%b/%Y:%H:%M:%S %z,flag:bool,path:array,code:float,size:integer'
}
d = create_driver(conf)
regexp = d.instance.instance_variable_get(:@regexp)
regexp = d.instance.expression
assert_equal(0, regexp.options)
end

data(
ignorecase: [{ "ignorecase" => true }, Regexp::IGNORECASE],
multiline: [{ "multiline" => true }, Regexp::MULTILINE],
ignorecase_multiline: [{ "ignorecase" => true, "multiline" => true }, Regexp::IGNORECASE | Regexp::MULTILINE],
ignorecase: ["i", Regexp::IGNORECASE],
multiline: ["m", Regexp::MULTILINE],
ignorecase_multiline: ["im", Regexp::IGNORECASE | Regexp::MULTILINE],
)
def test_options(data)
regexp_option, expected = data
conf = {
'expression' => %q!/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] \[(?<date>[^\]]*)\] "(?<flag>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)$/!,
'expression' => %Q!/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] \[(?<date>[^\]]*)\] "(?<flag>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)$/#{regexp_option}!,
'time_format' => "%d/%b/%Y:%H:%M:%S %z",
'types' => 'user:string,date:time:%d/%b/%Y:%H:%M:%S %z,flag:bool,path:array,code:float,size:integer'
}
conf = conf.merge(regexp_option)
d = create_driver(conf)
regexp = d.instance.instance_variable_get(:@regexp)
regexp = d.instance.expression
assert_equal(expected, regexp.options)
end
end
Expand Down

0 comments on commit 0fa6a2d

Please sign in to comment.