Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

in_tail: Expand glob capability for square brackets and one character matcher #4401

Merged
merged 13 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions lib/fluent/plugin/in_tail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def initialize
config_param :path, :string
desc 'path delimiter used for spliting path config'
config_param :path_delimiter, :string, default: ','
desc 'Choose using glob patterns. Adding capabilities to handle [] and ?, and {}.'
config_param :glob_policy, :enum, list: [:backward_compatible, :extended, :always], default: :backward_compatible
desc 'The tag of the event.'
config_param :tag, :string
desc 'The paths to exclude the files from watcher list.'
Expand Down Expand Up @@ -140,6 +142,14 @@ def configure(conf)
raise Fluent::ConfigError, "either of enable_watch_timer or enable_stat_watcher must be true"
end

if @glob_policy == :always && @path_delimiter == ','
raise Fluent::ConfigError, "cannot use glob_policy as always with the default path_delimitor: `,\""
end

if @glob_policy == :extended && /\{.*,.*\}/.match(@path) && extended_glob_pattern(@path)
raise Fluent::ConfigError, "cannot include curly braces with glob patterns in `#{@path}\". Use glob_policy always instead."
end

if RESERVED_CHARS.include?(@path_delimiter)
rc = RESERVED_CHARS.join(', ')
raise Fluent::ConfigError, "#{rc} are reserved words: #{@path_delimiter}"
Expand Down Expand Up @@ -283,6 +293,28 @@ def have_read_capability?
@capability.have_capability?(:effective, :dac_override)
end

def extended_glob_pattern(path)
path.include?('*') || path.include?('?') || /\[.*\]/.match(path)
end

# Curly braces is not supported with default path_delimiter
# because the default delimiter of path is ",".
# This should be collided for wildcard pattern for curly braces and
# be handled as an error on #configure.
def use_glob?(path)
if @glob_policy == :always
# For future extensions, we decided to use `always' term to handle
# regular expressions as much as possible.
# This is because not using `true' as a returning value
# when choosing :always here.
extended_glob_pattern(path) || /\{.*,.*\}/.match(path)
cosmo0920 marked this conversation as resolved.
Show resolved Hide resolved
elsif @glob_policy == :extended
extended_glob_pattern(path)
elsif @glob_policy == :backward_compatible
path.include?('*')
end
end

def expand_paths
date = Fluent::EventTime.now
paths = []
Expand All @@ -292,7 +324,7 @@ def expand_paths
else
date.to_time.strftime(path)
end
if path.include?('*')
if use_glob?(path)
paths += Dir.glob(path).select { |p|
begin
is_file = !File.directory?(p)
Expand Down Expand Up @@ -327,7 +359,7 @@ def expand_paths
else
date.to_time.strftime(path)
end
path.include?('*') ? Dir.glob(path) : path
use_glob?(path) ? Dir.glob(path) : path
}.flatten.uniq
# filter out non existing files, so in case pattern is without '*' we don't do unnecessary work
hash = {}
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
141 changes: 141 additions & 0 deletions test/plugin/test_in_tail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1538,6 +1538,147 @@ def test_expand_paths
assert_equal(ex_paths - [ex_paths.last], plugin.expand_paths.values.sort_by { |path_ino| path_ino.path })
end

sub_test_case "expand_paths with glob" do |data|
sub_test_case "extended_glob" do
data("curly braces" => [true, "always", "test/plugin/data/log_numeric/{0,1}*.log"],
"square brackets" => [true, "always", "test/plugin/data/log_numeric/[0-1][2-4].log"],
"asterisk" => [true, "always", "test/plugin/data/log/*.log"],
"one character matcher" => [true, "always", "test/plugin/data/log/tes?.log"],
)
def test_expand_paths_with_use_glob_p_and_almost_set_of_patterns
result, option, path = data
config = config_element("", "", {
"tag" => "tail",
"path" => path,
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => option,
"path_delimiter" => "|",
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})
plugin = create_driver(config, false).instance
assert_equal(result, !!plugin.use_glob?(path))
end

data("curly braces" => [true, false, "extended", "test/plugin/data/log_numeric/{0,1}*.log"],
"square brackets" => [false, true, "extended", "test/plugin/data/log_numeric/[0-1][2-4].log"],
"asterisk" => [false, true, "extended", "test/plugin/data/log/*.log"],
"one character matcher" => [false, true, "extended", "test/plugin/data/log/tes?.log"],
)
def test_expand_paths_with_use_glob_p
emit_exception_p, result, option, path = data
config = config_element("", "", {
"tag" => "tail",
"path" => path,
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => option,
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})
if emit_exception_p
assert_raise(Fluent::ConfigError) do
plugin = create_driver(config, false).instance
end
else
plugin = create_driver(config, false).instance
assert_equal(result, !!plugin.use_glob?(path))
end
end
end

sub_test_case "only_use_backward_compatible" do
data("square brackets" => [false, "backward_compatible", "test/plugin/data/log_numeric/[0-1][2-4].log"],
"asterisk" => [true, "backward_compatible", "test/plugin/data/log/*.log"],
"one character matcher" => [false, "backward_compatible", "test/plugin/data/log/tes?.log"],
)
def test_expand_paths_with_use_glob_p
result, option, path = data
config = config_element("", "", {
"tag" => "tail",
"path" => path,
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => option,
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})
plugin = create_driver(config, false).instance
assert_equal(result, !!plugin.use_glob?(path))
end
end
end

def ex_config_with_brackets
config_element("", "", {
"tag" => "tail",
"path" => "test/plugin/data/log_numeric/[0-1][2-4].log",
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => "extended",
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})
end

def test_config_with_always_with_default_delimiter
assert_raise(Fluent::ConfigError) do
config = config_element("", "", {
"tag" => "tail",
"path" => "test/plugin/data/log_numeric/[0-1][2-4].log",
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => "always",
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})

create_driver(config, false).instance
end
end

def test_config_with_always_with_custom_delimiter
assert_nothing_raised do
config = config_element("", "", {
"tag" => "tail",
"path" => "test/plugin/data/log_numeric/[0-1][2-4].log",
"format" => "none",
"pos_file" => "#{@tmp_dir}/tail.pos",
"read_from_head" => true,
"refresh_interval" => 30,
"glob_policy" => "always",
"path_delimiter" => "|",
"rotate_wait" => "#{EX_ROTATE_WAIT}s",
"follow_inodes" => "#{EX_FOLLOW_INODES}",
})

create_driver(config, false).instance
end
end

def test_expand_paths_with_brackets
expanded_paths = [
create_target_info('test/plugin/data/log_numeric/01.log'),
create_target_info('test/plugin/data/log_numeric/02.log'),
create_target_info('test/plugin/data/log_numeric/12.log'),
create_target_info('test/plugin/data/log_numeric/14.log'),
]

plugin = create_driver(ex_config_with_brackets, false).instance
assert_equal(expanded_paths - [expanded_paths.first], plugin.expand_paths.values.sort_by { |path_ino| path_ino.path })
end

def test_expand_paths_with_duplicate_configuration
expanded_paths = [
create_target_info('test/plugin/data/log/foo/bar.log'),
Expand Down
Loading