Skip to content

Commit

Permalink
in_tail: Expand glob capability for square brackets and one character…
Browse files Browse the repository at this point in the history
… matcher (fluent#4401)

Signed-off-by: Hiroshi Hatake <[email protected]>
  • Loading branch information
cosmo0920 authored and daipom committed Apr 30, 2024
1 parent 940624e commit 638744d
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 2 deletions.
36 changes: 34 additions & 2 deletions lib/fluent/plugin/in_tail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def initialize
config_param :path, :string
desc 'path delimiter used for spliting path config'
config_param :path_delimiter, :string, default: ','
desc 'Choose using glob patterns. Adding capabilities to handle [] and ?, and {}.'
config_param :glob_policy, :enum, list: [:backward_compatible, :extended, :always], default: :backward_compatible
desc 'The tag of the event.'
config_param :tag, :string
desc 'The paths to exclude the files from watcher list.'
Expand Down Expand Up @@ -141,6 +143,14 @@ def configure(conf)
raise Fluent::ConfigError, "either of enable_watch_timer or enable_stat_watcher must be true"
end

if @glob_policy == :always && @path_delimiter == ','
raise Fluent::ConfigError, "cannot use glob_policy as always with the default path_delimitor: `,\""
end

if @glob_policy == :extended && /\{.*,.*\}/.match(@path) && extended_glob_pattern(@path)
raise Fluent::ConfigError, "cannot include curly braces with glob patterns in `#{@path}\". Use glob_policy always instead."
end

if RESERVED_CHARS.include?(@path_delimiter)
rc = RESERVED_CHARS.join(', ')
raise Fluent::ConfigError, "#{rc} are reserved words: #{@path_delimiter}"
Expand Down Expand Up @@ -288,6 +298,28 @@ def have_read_capability?
@capability.have_capability?(:effective, :dac_override)
end

def extended_glob_pattern(path)
path.include?('*') || path.include?('?') || /\[.*\]/.match(path)
end

# Curly braces is not supported with default path_delimiter
# because the default delimiter of path is ",".
# This should be collided for wildcard pattern for curly braces and
# be handled as an error on #configure.
def use_glob?(path)
if @glob_policy == :always
# For future extensions, we decided to use `always' term to handle
# regular expressions as much as possible.
# This is because not using `true' as a returning value
# when choosing :always here.
extended_glob_pattern(path) || /\{.*,.*\}/.match(path)
elsif @glob_policy == :extended
extended_glob_pattern(path)
elsif @glob_policy == :backward_compatible
path.include?('*')
end
end

def expand_paths
date = Fluent::EventTime.now
paths = []
Expand All @@ -297,7 +329,7 @@ def expand_paths
else
date.to_time.strftime(path)
end
if path.include?('*')
if use_glob?(path)
paths += Dir.glob(path).select { |p|
begin
is_file = !File.directory?(p)
Expand Down Expand Up @@ -332,7 +364,7 @@ def expand_paths
else
date.to_time.strftime(path)
end
path.include?('*') ? Dir.glob(path) : path
use_glob?(path) ? Dir.glob(path) : path
}.flatten.uniq
# filter out non existing files, so in case pattern is without '*' we don't do unnecessary work
hash = {}
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
141 changes: 141 additions & 0 deletions test/plugin/test_in_tail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1538,6 +1538,147 @@ def test_expand_paths
assert_equal(ex_paths - [ex_paths.last], plugin.expand_paths.values.sort_by { |path_ino| path_ino.path })
end

sub_test_case "expand_paths with glob" do |data|
sub_test_case "extended_glob" do
data("curly braces" => [true, "always", "test/plugin/data/log_numeric/{0,1}*.log"],
"square brackets" => [true, "always", "test/plugin/data/log_numeric/[0-1][2-4].log"],
"asterisk" => [true, "always", "test/plugin/data/log/*.log"],
"one character matcher" => [true, "always", "test/plugin/data/log/tes?.log"],
)
def test_expand_paths_with_use_glob_p_and_almost_set_of_patterns
result, option, path = data
config = config_element("", "", {
"tag" => "tail",
"path" => path,
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => option,
"path_delimiter" => "|",
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})
plugin = create_driver(config, false).instance
assert_equal(result, !!plugin.use_glob?(path))
end

data("curly braces" => [true, false, "extended", "test/plugin/data/log_numeric/{0,1}*.log"],
"square brackets" => [false, true, "extended", "test/plugin/data/log_numeric/[0-1][2-4].log"],
"asterisk" => [false, true, "extended", "test/plugin/data/log/*.log"],
"one character matcher" => [false, true, "extended", "test/plugin/data/log/tes?.log"],
)
def test_expand_paths_with_use_glob_p
emit_exception_p, result, option, path = data
config = config_element("", "", {
"tag" => "tail",
"path" => path,
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => option,
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})
if emit_exception_p
assert_raise(Fluent::ConfigError) do
plugin = create_driver(config, false).instance
end
else
plugin = create_driver(config, false).instance
assert_equal(result, !!plugin.use_glob?(path))
end
end
end

sub_test_case "only_use_backward_compatible" do
data("square brackets" => [false, "backward_compatible", "test/plugin/data/log_numeric/[0-1][2-4].log"],
"asterisk" => [true, "backward_compatible", "test/plugin/data/log/*.log"],
"one character matcher" => [false, "backward_compatible", "test/plugin/data/log/tes?.log"],
)
def test_expand_paths_with_use_glob_p
result, option, path = data
config = config_element("", "", {
"tag" => "tail",
"path" => path,
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => option,
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})
plugin = create_driver(config, false).instance
assert_equal(result, !!plugin.use_glob?(path))
end
end
end

def ex_config_with_brackets
config_element("", "", {
"tag" => "tail",
"path" => "test/plugin/data/log_numeric/[0-1][2-4].log",
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => "extended",
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})
end

def test_config_with_always_with_default_delimiter
assert_raise(Fluent::ConfigError) do
config = config_element("", "", {
"tag" => "tail",
"path" => "test/plugin/data/log_numeric/[0-1][2-4].log",
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => "always",
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})

create_driver(config, false).instance
end
end

def test_config_with_always_with_custom_delimiter
assert_nothing_raised do
config = config_element("", "", {
"tag" => "tail",
"path" => "test/plugin/data/log_numeric/[0-1][2-4].log",
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => "always",
"path_delimiter" => "|",
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})

create_driver(config, false).instance
end
end

def test_expand_paths_with_brackets
expanded_paths = [
create_target_info('test/plugin/data/log_numeric/01.log'),
create_target_info('test/plugin/data/log_numeric/02.log'),
create_target_info('test/plugin/data/log_numeric/12.log'),
create_target_info('test/plugin/data/log_numeric/14.log'),
]

plugin = create_driver(ex_config_with_brackets, false).instance
assert_equal(expanded_paths - [expanded_paths.first], plugin.expand_paths.values.sort_by { |path_ino| path_ino.path })
end

def test_expand_paths_with_duplicate_configuration
expanded_paths = [
create_target_info('test/plugin/data/log/foo/bar.log'),
Expand Down

0 comments on commit 638744d

Please sign in to comment.