diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 30a254fd15bf..3ceb2dc39107 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -229,6 +229,7 @@ otherwise no tag is added. {issue}42208[42208] {pull}42403[42403] - Fix timeout handling by Crowdstrike streaming input. {pull}44720[44720] - Ensure DEPROVISIONED Okta entities are published by Okta entityanalytics provider. {issue}12658[12658] {pull}44719[44719] - Fix handling of cursors by the streaming input for Crowdstrike. {issue}44364[44364] {pull}44548[44548] +- Added missing "text/csv" content-type filter support in azureblobsortorage input. {issue}44596[44596] {pull}44824[44824] - Fix unexpected EOF detection and improve memory usage. {pull}44813[44813] *Heartbeat* diff --git a/x-pack/filebeat/input/azureblobstorage/input_test.go b/x-pack/filebeat/input/azureblobstorage/input_test.go index 0e5441dcd438..f666eb1b2594 100644 --- a/x-pack/filebeat/input/azureblobstorage/input_test.go +++ b/x-pack/filebeat/input/azureblobstorage/input_test.go @@ -36,6 +36,7 @@ const ( beatsNdJSONContainer = "beatsndjsoncontainer" beatsGzJSONContainer = "beatsgzjsoncontainer" beatsJSONWithArrayContainer = "beatsjsonwitharraycontainer" + beatsCSVContainer = "beatscsvcontainer" ) func Test_StorageClient(t *testing.T) { @@ -471,6 +472,28 @@ func Test_StorageClient(t *testing.T) { mock.Beatscontainer_2_blob_data3_json: true, }, }, + { + name: "ReadCSV", + baseConfig: map[string]interface{}{ + "account_name": "beatsblobnew", + "auth.shared_credentials.account_key": "7pfLm1betGiRyyABEM/RFrLYlafLZHbLtGhB52LkWVeBxE7la9mIvk6YYAbQKYE/f0GdhiaOZeV8+AStsAdr/Q==", + "max_workers": 1, + "poll": true, + "poll_interval": "10s", + "decoding.codec.csv.enabled": true, + "decoding.codec.csv.comma": " ", + "containers": []map[string]interface{}{ + { + "name": beatsCSVContainer, + }, + }, + }, + mockHandler: mock.AzureStorageFileServer, + expected: map[string]bool{ + mock.BeatsFilesContainer_csv[0]: true, + mock.BeatsFilesContainer_csv[1]: true, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -540,7 +563,6 @@ func Test_StorageClient(t *testing.T) { var err error val, err = got.Fields.GetValue("message") assert.NoError(t, err) - assert.True(t, tt.expected[strings.ReplaceAll(val.(string), "\r\n", "\n")]) assert.Equal(t, tt.expectedError, err) receivedCount += 1 diff --git a/x-pack/filebeat/input/azureblobstorage/mock/data_files.go b/x-pack/filebeat/input/azureblobstorage/mock/data_files.go index 3273d329412f..a320df83a3e8 100644 --- a/x-pack/filebeat/input/azureblobstorage/mock/data_files.go +++ b/x-pack/filebeat/input/azureblobstorage/mock/data_files.go @@ -10,6 +10,7 @@ const ( beatsNdJSONContainer = "beatsndjsoncontainer" beatsGzJSONContainer = "beatsgzjsoncontainer" beatsJSONWithArrayContainer = "beatsjsonwitharraycontainer" + beatsCSVContainer = "beatscsvcontainer" ) var fileContainers = map[string]bool{ @@ -18,6 +19,7 @@ var fileContainers = map[string]bool{ beatsNdJSONContainer: true, beatsGzJSONContainer: true, beatsJSONWithArrayContainer: true, + beatsCSVContainer: true, } var availableFileBlobs = map[string]map[string]bool{ @@ -38,6 +40,9 @@ var availableFileBlobs = map[string]map[string]bool{ "array-at-root.json": true, "nested-arrays.json": true, }, + beatsCSVContainer: { + "txn1.csv": true, + }, } var fetchFilesContainer = map[string]string{ @@ -197,6 +202,30 @@ var fetchFilesContainer = map[string]string{ `, + beatsCSVContainer: ` + + + + txn1.csv + + Wed, 14 Sep 2022 12:12:28 GMT + 0x8DA964A64516C82 + 643 + text/csv + + + UjQX73kQRTHx+UyXZDvVkg== + + + BlockBlob + unlocked + available + + + + + + `, } var BeatsFilesContainer_multiline_json = []string{ @@ -231,3 +260,8 @@ var BeatsFilesContainer_multiline_json_gz = []string{ "{\n \"@timestamp\": \"2021-05-25T17:25:42.806Z\",\n \"log.level\": \"error\",\n \"message\": \"error making http request\"\n}", "{\n \"@timestamp\": \"2021-05-25T17:25:51.391Z\",\n \"log.level\": \"info\",\n \"message\": \"available disk space 44.3gb\"\n}", } + +var BeatsFilesContainer_csv = []string{ + "{\"id\":\"1\",\"name\":\"Alice\",\"email\":\"alice@example.com\",\"status\":\"active\"}", + "{\"id\":\"2\",\"name\":\"Bob\",\"email\":\"bob@example.com\",\"status\":\"inactive\"}", +} diff --git a/x-pack/filebeat/input/azureblobstorage/mock/mock.go b/x-pack/filebeat/input/azureblobstorage/mock/mock.go index 78c70b28df8d..37ebc0f3efba 100644 --- a/x-pack/filebeat/input/azureblobstorage/mock/mock.go +++ b/x-pack/filebeat/input/azureblobstorage/mock/mock.go @@ -78,6 +78,8 @@ func AzureStorageFileServer() http.Handler { w.Header().Set(contentType, jsonType) case "log.ndjson": w.Header().Set(contentType, "application/x-ndjson") + case "txn1.csv": + w.Header().Set(contentType, "text/csv") } w.Write(data) return diff --git a/x-pack/filebeat/input/azureblobstorage/mock/testdata/txn1.csv b/x-pack/filebeat/input/azureblobstorage/mock/testdata/txn1.csv new file mode 100644 index 000000000000..b78c38da9eb1 --- /dev/null +++ b/x-pack/filebeat/input/azureblobstorage/mock/testdata/txn1.csv @@ -0,0 +1,3 @@ +id name email status +1 Alice alice@example.com active +2 Bob bob@example.com inactive diff --git a/x-pack/filebeat/input/azureblobstorage/types.go b/x-pack/filebeat/input/azureblobstorage/types.go index 5654ba54f645..09d85c86f151 100644 --- a/x-pack/filebeat/input/azureblobstorage/types.go +++ b/x-pack/filebeat/input/azureblobstorage/types.go @@ -37,6 +37,7 @@ const ( octetType = "application/octet-stream" ndJsonType = "application/x-ndjson" gzType = "application/x-gzip" + csvType = "text/csv" encodingGzip = "gzip" ) @@ -59,4 +60,5 @@ var allowedContentTypes = map[string]bool{ octetType: true, ndJsonType: true, gzType: true, + csvType: true, }