Skip to content

Commit

Permalink
Merge pull request #2305 from fluent/in_tail-encoding-enhancement
Browse files Browse the repository at this point in the history
in_tail encoding enhancement
  • Loading branch information
repeatedly authored Feb 22, 2019
2 parents 8fdc207 + a41324c commit 08bfa39
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
12 changes: 9 additions & 3 deletions lib/fluent/plugin/in_tail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ def configure_encoding

@encoding = parse_encoding_param(@encoding) if @encoding
@from_encoding = parse_encoding_param(@from_encoding) if @from_encoding
if @encoding == @from_encoding
log.warn "'encoding' and 'from_encoding' are same encoding. No effect"
end
end

def parse_encoding_param(encoding_name)
Expand Down Expand Up @@ -657,6 +660,7 @@ class FIFO
def initialize(from_encoding, encoding)
@from_encoding = from_encoding
@encoding = encoding
@need_enc = from_encoding != encoding
@buffer = ''.force_encoding(from_encoding)
@eol = "\n".encode(from_encoding).freeze
end
Expand All @@ -682,11 +686,13 @@ def <<(chunk)
end

def convert(s)
if @from_encoding == @encoding
s
if @need_enc
s.encode!(@encoding, @from_encoding)
else
s.encode(@encoding, @from_encoding)
s
end
rescue
s.encode!(@encoding, @from_encoding, :invalid => :replace, :undef => :replace)
end

def next_line
Expand Down
20 changes: 20 additions & 0 deletions test/plugin/test_in_tail.rb
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,26 @@ def test_from_encoding_utf16
assert_equal(Encoding::UTF_8, events[0][2]['message'].encoding)
end

def test_encoding_with_bad_character
conf = config_element(
"", "", {
"format" => "/(?<message>.*)/",
"read_from_head" => "true",
"from_encoding" => "ASCII-8BIT",
"encoding" => "utf-8"
})
d = create_driver(conf)

d.run(expect_emits: 1) do
File.open("#{TMP_DIR}/tail.txt", "w") { |f|
f.write "te\x86st\n"
}
end

events = d.events
assert_equal("te\uFFFDst", events[0][2]['message'])
assert_equal(Encoding::UTF_8, events[0][2]['message'].encoding)
end

sub_test_case "multiline" do
data(flat: MULTILINE_CONFIG,
Expand Down

0 comments on commit 08bfa39

Please sign in to comment.