diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index e85dcb6a479f..a4f0aac6954c 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -64,6 +64,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Not hiding error in case of http failure using elastic fetcher {pull}11604[11604] - Relax validation of the X-Pack license UID value. {issue}11640[11640] - Fix a parsing error with the X-Pack license check on 32-bit system. {issue}11650[11650] +- Escape BOM on JsonReader before trying to decode line {pull}11661[11661] - Fix ILM policy always being overwritten. {pull}11671[11671] - Fix template always being overwritten. {pull}11671[11671] - Fix matching of string arrays in contains condition. {pull}11691[11691] diff --git a/filebeat/input/log/harvester.go b/filebeat/input/log/harvester.go index 23cf62d26e43..469ae109da9d 100644 --- a/filebeat/input/log/harvester.go +++ b/filebeat/input/log/harvester.go @@ -29,7 +29,6 @@ package log import ( - "bytes" "errors" "fmt" "io" @@ -283,12 +282,6 @@ func (h *Harvester) Run() error { return nil } - // Strip UTF-8 BOM if beginning of file - // As all BOMS are converted to UTF-8 it is enough to only remove this one - if h.state.Offset == 0 { - message.Content = bytes.Trim(message.Content, "\xef\xbb\xbf") - } - // Get copy of state to work on // This is important in case sending is not successful so on shutdown // the old offset is reported diff --git a/libbeat/reader/readfile/encode.go b/libbeat/reader/readfile/encode.go index 920d9a209209..ada4ae91947b 100644 --- a/libbeat/reader/readfile/encode.go +++ b/libbeat/reader/readfile/encode.go @@ -18,6 +18,7 @@ package readfile import ( + "bytes" "io" "time" @@ -53,7 +54,7 @@ func (r EncoderReader) Next() (reader.Message, error) { // Creating message object return reader.Message{ Ts: time.Now(), - Content: c, + Content: bytes.Trim(c, "\xef\xbb\xbf"), Bytes: sz, }, err } diff --git a/libbeat/reader/readfile/encode_test.go b/libbeat/reader/readfile/encode_test.go new file mode 100644 index 000000000000..36d3df433ec5 --- /dev/null +++ b/libbeat/reader/readfile/encode_test.go @@ -0,0 +1,73 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package readfile + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/libbeat/reader/readfile/encoding" +) + +func TestEncodeLines(t *testing.T) { + testCases := map[string]struct { + Input []byte + Output []string + }{ + "simple": {[]byte("testing simple line\n"), []string{"testing simple line\n"}}, + "multiline": {[]byte("testing\nmultiline\n"), []string{"testing\n", "multiline\n"}}, + "bom-on-first": {[]byte("\xef\xbb\xbftesting simple line\n"), []string{"testing simple line\n"}}, + "bom-on-each": {[]byte("\xef\xbb\xbftesting\n\xef\xbb\xbfmultiline\n"), []string{"testing\n", "multiline\n"}}, + "bom-in-the-middle": {[]byte("testing simple \xef\xbb\xbfline\n"), []string{"testing simple \xef\xbb\xbfline\n"}}, + } + + bufferSize := 1000 + encFactory, ok := encoding.FindEncoding("plain") + if !ok { + t.Fatal("failed to initiate encoding") + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + r := bytes.NewReader(testCase.Input) + codec, err := encFactory(r) + assert.Nil(t, err, "failed to initialize encoding: %v", err) + + config := Config{ + Codec: codec, + BufferSize: bufferSize, + Terminator: LineFeed, + } + er, err := NewEncodeReader(r, config) + assert.Nil(t, err, "failed to create new encoder: %v", err) + + var output []string + for { + msg, err := er.Next() + if err != nil { + break + } + output = append(output, string(msg.Content)) + } + + assert.Equal(t, testCase.Output, output) + }) + } +}