Skip to content

Commit

Permalink
Add from_encoding parameter to in_tail plugin (fluent#1067)
Browse files Browse the repository at this point in the history
* Add test code to check from_encoding param

Add configuration check new param "from_encoding",
and test convert from input source is "Hello world" in japanese Hiragana
that is encoded in cp932 to UTF-8.

* Add from_encoding param to in_tail plugin

Add new param "from_encoding".
If "encoding" param is only specified, process is same way as ever
to keep backword compatibility.
If two params, "encoding" and "from_encoding" are specified, process uses
```String.encode!(to, from)```.

* Specify type of encoding and from_encoding

* Fix configuration encoding and from_encoding

* Fix test code

Add test pattern from_encoding is only specified.

* Fix configure_encoding

Use log instead of $log.
Change the log messages so that the users are easy to understand.
Return as soon as possible, if encoding parameters are not specified.

* Use ConfigError instead of warnning-log

I deleted 2nd log message.
Changed to raise error if 'encoding' and 'from_encoding' paramters are bad cofiguration.

* Fix test code

Corresponding to ConfigError

* Change implementation of encode statement

For almost users, default setting should be faster than
the setting is specified encoding.
  • Loading branch information
footaku authored and ganmacs committed Aug 31, 2016
1 parent a67b01d commit 146f21c
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 9 deletions.
46 changes: 37 additions & 9 deletions lib/fluent/plugin/in_tail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,10 @@ def initialize
config_param :multiline_flush_interval, :time, default: nil
desc 'Enable the additional watch timer.'
config_param :enable_watch_timer, :bool, default: true
desc 'The encoding after conversion of the input.'
config_param :encoding, :string, default: nil
desc 'The encoding of the input.'
config_param :encoding, default: nil do |encoding_name|
begin
Encoding.find(encoding_name)
rescue ArgumentError => e
raise ConfigError, e.message
end
end
config_param :from_encoding, :string, default: nil
desc 'Add the log path being tailed to records. Specify the field name to be used.'
config_param :path_key, :string, default: nil

Expand All @@ -95,6 +91,7 @@ def configure(conf)

configure_parser(conf)
configure_tag
configure_encoding

@multiline_mode = conf['format'] =~ /multiline/
@receive_handler = if @multiline_mode
Expand All @@ -120,6 +117,25 @@ def configure_tag
end
end

def configure_encoding
unless @encoding
if @from_encoding
raise ConfigError, "tail: 'from_encoding' parameter must be specified with 'encoding' parameter."
end
end

@encoding = parse_encoding_param(@encoding) if @encoding
@from_encoding = parse_encoding_param(@from_encoding) if @from_encoding
end

def parse_encoding_param(encoding_name)
begin
Encoding.find(encoding_name) if encoding_name
rescue ArgumentError => e
raise ConfigError, e.message
end
end

def start
super

Expand Down Expand Up @@ -254,7 +270,13 @@ def close_watcher_after_rotate_wait(tw)
def flush_buffer(tw)
if lb = tw.line_buffer
lb.chomp!
lb.force_encoding(@encoding) if @encoding
if @encoding
if @from_encoding
lb.encode!(@encoding, @from_encoding)
else
lb.force_encoding(@encoding)
end
end
@parser.parse(lb) { |time, record|
if time && record
tag = if @tag_prefix || @tag_suffix
Expand Down Expand Up @@ -303,7 +325,13 @@ def receive_lines(lines, tail_watcher)
def convert_line_to_event(line, es, tail_watcher)
begin
line.chomp! # remove \n
line.force_encoding(@encoding) if @encoding
if @encoding
if @from_encoding
line.encode!(@encoding, @from_encoding)
else
line.force_encoding(@encoding)
end
end
@parser.parse(line) { |time, record|
if time && record
record[@path_key] ||= tail_watcher.path unless @path_key.nil?
Expand Down
73 changes: 73 additions & 0 deletions test/plugin/test_in_tail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,32 @@ def test_configure_encoding
end
end

def test_configure_from_encoding
# If only specified from_encoding raise ConfigError
assert_raise(Fluent::ConfigError) do
d = create_driver(SINGLE_LINE_CONFIG + 'from_encoding utf-8')
end

# valid setting
d = create_driver %[
format /(?<message>.*)/
read_from_head true
from_encoding utf-8
encoding utf-8
]
assert_equal Encoding::UTF_8, d.instance.from_encoding

# invalid from_encoding
assert_raise(Fluent::ConfigError) do
d = create_driver %[
format /(?<message>.*)/
read_from_head true
from_encoding no-such-encoding
encoding utf-8
]
end
end

# TODO: Should using more better approach instead of sleep wait

def test_emit
Expand Down Expand Up @@ -403,6 +429,28 @@ def test_encoding(data)
assert_equal(encoding, emits[0][2]['message'].encoding)
end

def test_from_encoding
d = create_driver %[
format /(?<message>.*)/
read_from_head true
from_encoding cp932
encoding utf-8
]

d.run do
sleep 1

File.open("#{TMP_DIR}/tail.txt", "w:cp932") {|f|
f.puts "\x82\xCD\x82\xEB\x81\x5B\x82\xED\x81\x5B\x82\xE9\x82\xC7".force_encoding(Encoding::CP932)
}
sleep 1
end

emits = d.emits
assert_equal("\x82\xCD\x82\xEB\x81\x5B\x82\xED\x81\x5B\x82\xE9\x82\xC7".force_encoding(Encoding::CP932).encode(Encoding::UTF_8), emits[0][2]['message'])
assert_equal(Encoding::UTF_8, emits[0][2]['message'].encoding)
end

# multiline mode test

def test_multiline
Expand Down Expand Up @@ -507,6 +555,31 @@ def test_multiline_encoding_of_flushed_record(data)
end
end

def test_multiline_from_encoding_of_flushed_record
d = create_driver %[
format multiline
format1 /^s (?<message1>[^\\n]+)(\\nf (?<message2>[^\\n]+))?(\\nf (?<message3>.*))?/
format_firstline /^[s]/
multiline_flush_interval 2s
read_from_head true
from_encoding cp932
encoding utf-8
]

d.run do
sleep 1
File.open("#{TMP_DIR}/tail.txt", "w:cp932") { |f|
f.puts "s \x82\xCD\x82\xEB\x81\x5B\x82\xED\x81\x5B\x82\xE9\x82\xC7".force_encoding(Encoding::CP932)
}

sleep 4
emits = d.emits
assert_equal(1, emits.length)
assert_equal("\x82\xCD\x82\xEB\x81\x5B\x82\xED\x81\x5B\x82\xE9\x82\xC7".force_encoding(Encoding::CP932).encode(Encoding::UTF_8), emits[0][2]['message1'])
assert_equal(Encoding::UTF_8, emits[0][2]['message1'].encoding)
end
end

def test_multiline_with_multiple_formats
File.open("#{TMP_DIR}/tail.txt", "wb") { |f| }

Expand Down

0 comments on commit 146f21c

Please sign in to comment.