From 28a1c7a80a346bb4c1b2f729b9ec04d269282d00 Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Tue, 26 Apr 2016 13:37:43 +0200 Subject: [PATCH] Fix for #1466 The cause was a nil value in the incoming JSON, which the generic filtering code didn't expect. I considered adding a `recover` to generic filtering so that things like this don't crash the whole process, but decided against. One reason is that it's better to discover these things while we're still in alpha/beta. Second is that if we recover here, there could still be a crash later in filtering our outputs. Also took the opportunity to add a couple of system tests that combine json and generic filtering. --- CHANGELOG.asciidoc | 1 + filebeat/tests/files/logs/json_null.log | 1 + filebeat/tests/system/test_json.py | 82 +++++++++++++++++++++++++ libbeat/common/event.go | 5 ++ libbeat/common/event_test.go | 8 +++ 5 files changed, 97 insertions(+) create mode 100755 filebeat/tests/files/logs/json_null.log diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 08ee1f5a5509..268e0bafd07a 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -31,6 +31,7 @@ https://github.com/elastic/beats/compare/v5.0.0-alpha1...master[Check the HEAD d the location for DEB and RPM packages stays the same. {pull}1373[1373] - Fix issue with JSON decoding where `@timestamp` or `type` keys with the wrong type could cause Filebeat to crash. {issue}1378[1378] +- Fix issue with JSON decoding where values having `null` as values could crash Filebeat. {issue}1466[1466] *Winlogbeat* diff --git a/filebeat/tests/files/logs/json_null.log b/filebeat/tests/files/logs/json_null.log new file mode 100755 index 000000000000..009951044a10 --- /dev/null +++ b/filebeat/tests/files/logs/json_null.log @@ -0,0 +1 @@ +{"method":"GET","path":"/auth/authorize","headers":{"content-type":"application/json","content-length":4,"location":"http://devhost:3000/auth/return?backend=http%3A%2F%2Fdevhost%3A4000?code=2f4818fd-a396-4150-a96b-3ee430ebef17","access-control-allow-origin":"*","access-control-allow-headers":"Accept, Accept-Version, Content-Length, Content-MD5, Content-Type, Date, Api-Version, Response-Time","access-control-allow-methods":"GET","access-control-expose-headers":"Api-Version, Request-Id, Response-Time","connection":"Keep-Alive","content-md5":"N6YlnMDB2uKZp4Zkid/wvQ==","date":"Fri, 04 Mar 2016 00:34:16 GMT","server":"hypeapi","api-version":"1.0.0","request-id":"0f4b264f-c259-4fc3-bd23-facf4021c0be","response-time":682},"res":null,"level":"info","message":"Sent response: ","timestamp":"2016-03-04T00:34:16.846Z"} diff --git a/filebeat/tests/system/test_json.py b/filebeat/tests/system/test_json.py index bc13e0da1d74..836296232e05 100644 --- a/filebeat/tests/system/test_json.py +++ b/filebeat/tests/system/test_json.py @@ -241,3 +241,85 @@ def test_type_in_message(self): assert output[2]["type"] == "log" assert output[2]["json_error"] == \ "type not overwritten (not string)" + + def test_with_generic_filtering(self): + """ + It should work fine to combine JSON decoding with + removing fields via generic filtering. The test log file + in here also contains a null value. + """ + self.render_config_template( + path=os.path.abspath(self.working_dir) + "/log/*", + json=dict( + message_key="message", + keys_under_root=True, + overwrite_keys=True, + add_error_key=True, + ), + filter_enabled=True, + drop_fields=["headers.request-id"], + include_fields=None, + ) + + os.mkdir(self.working_dir + "/log/") + self.copy_files(["logs/json_null.log"], + source_dir="../files", + target_dir="log") + + proc = self.start_beat() + self.wait_until( + lambda: self.output_has(lines=1), + max_timeout=10) + proc.check_kill_and_wait() + + output = self.read_output( + required_fields=["@timestamp", "type"], + ) + assert len(output) == 1 + o = output[0] + + assert "headers.content-type" in o + assert "headers.request-id" not in o + assert o["res"] is None + + def test_with_generic_filtering_remove_headers(self): + """ + It should work fine to combine JSON decoding with + removing fields via generic filtering. The test log file + in here also contains a null value. + """ + self.render_config_template( + path=os.path.abspath(self.working_dir) + "/log/*", + json=dict( + message_key="message", + keys_under_root=True, + overwrite_keys=True, + add_error_key=True, + ), + filter_enabled=True, + drop_fields=["headers", "res"], + include_fields=None, + ) + + os.mkdir(self.working_dir + "/log/") + self.copy_files(["logs/json_null.log"], + source_dir="../files", + target_dir="log") + + proc = self.start_beat() + self.wait_until( + lambda: self.output_has(lines=1), + max_timeout=10) + proc.check_kill_and_wait() + + output = self.read_output( + required_fields=["@timestamp", "type"], + ) + assert len(output) == 1 + o = output[0] + + assert "headers.content-type" not in o + assert "headers.request-id" not in o + assert "res" not in o + assert o["method"] == "GET" + assert o["message"] == "Sent response: " diff --git a/libbeat/common/event.go b/libbeat/common/event.go index dbe44c830e65..fe40741217d9 100644 --- a/libbeat/common/event.go +++ b/libbeat/common/event.go @@ -29,6 +29,11 @@ func ConvertToGenericEvent(v MapStr) MapStr { for key, value := range v { + if value == nil { + // leave nil values alone + continue + } + switch value.(type) { case Time, *Time: continue diff --git a/libbeat/common/event_test.go b/libbeat/common/event_test.go index b8bc2dd9d2b5..95be8be5bcbd 100644 --- a/libbeat/common/event_test.go +++ b/libbeat/common/event_test.go @@ -74,6 +74,14 @@ func TestConvertNestedMapStr(t *testing.T) { "@timestamp": MustParseTime("2015-03-01T12:34:56.123Z"), }, }, + io{ + Input: MapStr{ + "env": nil, + }, + Output: MapStr{ + "env": nil, + }, + }, } for _, test := range tests {