From 772de35c01e809cca702cb0c25b9a43c1807af23 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 21 Jun 2019 12:33:10 -0600 Subject: [PATCH 01/59] Add S3 input to retrieve logs from AWS S3 buckets --- CHANGELOG.next.asciidoc | 1 + .../service/s3/s3iface/interface.go | 250 ++++++++++++++ vendor/vendor.json | 10 +- x-pack/filebeat/filebeat.yml | 14 +- x-pack/filebeat/include/list.go | 1 + x-pack/filebeat/input/s3/_meta/fields.yml | 19 + x-pack/filebeat/input/s3/config.go | 21 ++ x-pack/filebeat/input/s3/fields.go | 23 ++ x-pack/filebeat/input/s3/input.go | 324 ++++++++++++++++++ x-pack/filebeat/input/s3/input_test.go | 107 ++++++ 10 files changed, 764 insertions(+), 6 deletions(-) create mode 100644 vendor/github.com/aws/aws-sdk-go-v2/service/s3/s3iface/interface.go create mode 100644 x-pack/filebeat/input/s3/_meta/fields.yml create mode 100644 x-pack/filebeat/input/s3/config.go create mode 100644 x-pack/filebeat/input/s3/fields.go create mode 100644 x-pack/filebeat/input/s3/input.go create mode 100644 x-pack/filebeat/input/s3/input_test.go diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index e1ca89db9d4..b22e0fcc2c8 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -250,6 +250,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Parse more fields from Elasticsearch slowlogs. {pull}11939[11939] - Update module pipelines to enrich events with autonomous system fields. {pull}13036[13036] - Add module for ingesting IBM MQ logs. {pull}8782[8782] +- Add S3 input to retrieve logs from AWS S3 buckets. *Heartbeat* diff --git a/vendor/github.com/aws/aws-sdk-go-v2/service/s3/s3iface/interface.go b/vendor/github.com/aws/aws-sdk-go-v2/service/s3/s3iface/interface.go new file mode 100644 index 00000000000..cf4128ddc2c --- /dev/null +++ b/vendor/github.com/aws/aws-sdk-go-v2/service/s3/s3iface/interface.go @@ -0,0 +1,250 @@ +// Code generated by private/model/cli/gen-api/main.go. DO NOT EDIT. + +// Package s3iface provides an interface to enable mocking the Amazon Simple Storage Service service client +// for testing your code. +// +// It is important to note that this interface will have breaking changes +// when the service model is updated and adds new API operations, paginators, +// and waiters. +package s3iface + +import ( + "context" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +// ClientAPI provides an interface to enable mocking the +// s3.Client methods. This make unit testing your code that +// calls out to the SDK's service client's calls easier. +// +// The best way to use this interface is so the SDK's service client's calls +// can be stubbed out for unit testing your code with the SDK without needing +// to inject custom request handlers into the SDK's request pipeline. +// +// // myFunc uses an SDK service client to make a request to +// // Amazon S3. +// func myFunc(svc s3iface.ClientAPI) bool { +// // Make svc.AbortMultipartUpload request +// } +// +// func main() { +// cfg, err := external.LoadDefaultAWSConfig() +// if err != nil { +// panic("failed to load config, " + err.Error()) +// } +// +// svc := s3.New(cfg) +// +// myFunc(svc) +// } +// +// In your _test.go file: +// +// // Define a mock struct to be used in your unit tests of myFunc. +// type mockClientClient struct { +// s3iface.ClientPI +// } +// func (m *mockClientClient) AbortMultipartUpload(input *s3.AbortMultipartUploadInput) (*s3.AbortMultipartUploadOutput, error) { +// // mock response/functionality +// } +// +// func TestMyFunc(t *testing.T) { +// // Setup Test +// mockSvc := &mockClientClient{} +// +// myfunc(mockSvc) +// +// // Verify myFunc's functionality +// } +// +// It is important to note that this interface will have breaking changes +// when the service model is updated and adds new API operations, paginators, +// and waiters. Its suggested to use the pattern above for testing, or using +// tooling to generate mocks to satisfy the interfaces. +type ClientAPI interface { + AbortMultipartUploadRequest(*s3.AbortMultipartUploadInput) s3.AbortMultipartUploadRequest + + CompleteMultipartUploadRequest(*s3.CompleteMultipartUploadInput) s3.CompleteMultipartUploadRequest + + CopyObjectRequest(*s3.CopyObjectInput) s3.CopyObjectRequest + + CreateBucketRequest(*s3.CreateBucketInput) s3.CreateBucketRequest + + CreateMultipartUploadRequest(*s3.CreateMultipartUploadInput) s3.CreateMultipartUploadRequest + + DeleteBucketRequest(*s3.DeleteBucketInput) s3.DeleteBucketRequest + + DeleteBucketAnalyticsConfigurationRequest(*s3.DeleteBucketAnalyticsConfigurationInput) s3.DeleteBucketAnalyticsConfigurationRequest + + DeleteBucketCorsRequest(*s3.DeleteBucketCorsInput) s3.DeleteBucketCorsRequest + + DeleteBucketEncryptionRequest(*s3.DeleteBucketEncryptionInput) s3.DeleteBucketEncryptionRequest + + DeleteBucketInventoryConfigurationRequest(*s3.DeleteBucketInventoryConfigurationInput) s3.DeleteBucketInventoryConfigurationRequest + + DeleteBucketLifecycleRequest(*s3.DeleteBucketLifecycleInput) s3.DeleteBucketLifecycleRequest + + DeleteBucketMetricsConfigurationRequest(*s3.DeleteBucketMetricsConfigurationInput) s3.DeleteBucketMetricsConfigurationRequest + + DeleteBucketPolicyRequest(*s3.DeleteBucketPolicyInput) s3.DeleteBucketPolicyRequest + + DeleteBucketReplicationRequest(*s3.DeleteBucketReplicationInput) s3.DeleteBucketReplicationRequest + + DeleteBucketTaggingRequest(*s3.DeleteBucketTaggingInput) s3.DeleteBucketTaggingRequest + + DeleteBucketWebsiteRequest(*s3.DeleteBucketWebsiteInput) s3.DeleteBucketWebsiteRequest + + DeleteObjectRequest(*s3.DeleteObjectInput) s3.DeleteObjectRequest + + DeleteObjectTaggingRequest(*s3.DeleteObjectTaggingInput) s3.DeleteObjectTaggingRequest + + DeleteObjectsRequest(*s3.DeleteObjectsInput) s3.DeleteObjectsRequest + + DeletePublicAccessBlockRequest(*s3.DeletePublicAccessBlockInput) s3.DeletePublicAccessBlockRequest + + GetBucketAccelerateConfigurationRequest(*s3.GetBucketAccelerateConfigurationInput) s3.GetBucketAccelerateConfigurationRequest + + GetBucketAclRequest(*s3.GetBucketAclInput) s3.GetBucketAclRequest + + GetBucketAnalyticsConfigurationRequest(*s3.GetBucketAnalyticsConfigurationInput) s3.GetBucketAnalyticsConfigurationRequest + + GetBucketCorsRequest(*s3.GetBucketCorsInput) s3.GetBucketCorsRequest + + GetBucketEncryptionRequest(*s3.GetBucketEncryptionInput) s3.GetBucketEncryptionRequest + + GetBucketInventoryConfigurationRequest(*s3.GetBucketInventoryConfigurationInput) s3.GetBucketInventoryConfigurationRequest + + GetBucketLifecycleRequest(*s3.GetBucketLifecycleInput) s3.GetBucketLifecycleRequest + + GetBucketLifecycleConfigurationRequest(*s3.GetBucketLifecycleConfigurationInput) s3.GetBucketLifecycleConfigurationRequest + + GetBucketLocationRequest(*s3.GetBucketLocationInput) s3.GetBucketLocationRequest + + GetBucketLoggingRequest(*s3.GetBucketLoggingInput) s3.GetBucketLoggingRequest + + GetBucketMetricsConfigurationRequest(*s3.GetBucketMetricsConfigurationInput) s3.GetBucketMetricsConfigurationRequest + + GetBucketNotificationRequest(*s3.GetBucketNotificationInput) s3.GetBucketNotificationRequest + + GetBucketNotificationConfigurationRequest(*s3.GetBucketNotificationConfigurationInput) s3.GetBucketNotificationConfigurationRequest + + GetBucketPolicyRequest(*s3.GetBucketPolicyInput) s3.GetBucketPolicyRequest + + GetBucketPolicyStatusRequest(*s3.GetBucketPolicyStatusInput) s3.GetBucketPolicyStatusRequest + + GetBucketReplicationRequest(*s3.GetBucketReplicationInput) s3.GetBucketReplicationRequest + + GetBucketRequestPaymentRequest(*s3.GetBucketRequestPaymentInput) s3.GetBucketRequestPaymentRequest + + GetBucketTaggingRequest(*s3.GetBucketTaggingInput) s3.GetBucketTaggingRequest + + GetBucketVersioningRequest(*s3.GetBucketVersioningInput) s3.GetBucketVersioningRequest + + GetBucketWebsiteRequest(*s3.GetBucketWebsiteInput) s3.GetBucketWebsiteRequest + + GetObjectRequest(*s3.GetObjectInput) s3.GetObjectRequest + + GetObjectAclRequest(*s3.GetObjectAclInput) s3.GetObjectAclRequest + + GetObjectLegalHoldRequest(*s3.GetObjectLegalHoldInput) s3.GetObjectLegalHoldRequest + + GetObjectLockConfigurationRequest(*s3.GetObjectLockConfigurationInput) s3.GetObjectLockConfigurationRequest + + GetObjectRetentionRequest(*s3.GetObjectRetentionInput) s3.GetObjectRetentionRequest + + GetObjectTaggingRequest(*s3.GetObjectTaggingInput) s3.GetObjectTaggingRequest + + GetObjectTorrentRequest(*s3.GetObjectTorrentInput) s3.GetObjectTorrentRequest + + GetPublicAccessBlockRequest(*s3.GetPublicAccessBlockInput) s3.GetPublicAccessBlockRequest + + HeadBucketRequest(*s3.HeadBucketInput) s3.HeadBucketRequest + + HeadObjectRequest(*s3.HeadObjectInput) s3.HeadObjectRequest + + ListBucketAnalyticsConfigurationsRequest(*s3.ListBucketAnalyticsConfigurationsInput) s3.ListBucketAnalyticsConfigurationsRequest + + ListBucketInventoryConfigurationsRequest(*s3.ListBucketInventoryConfigurationsInput) s3.ListBucketInventoryConfigurationsRequest + + ListBucketMetricsConfigurationsRequest(*s3.ListBucketMetricsConfigurationsInput) s3.ListBucketMetricsConfigurationsRequest + + ListBucketsRequest(*s3.ListBucketsInput) s3.ListBucketsRequest + + ListMultipartUploadsRequest(*s3.ListMultipartUploadsInput) s3.ListMultipartUploadsRequest + + ListObjectVersionsRequest(*s3.ListObjectVersionsInput) s3.ListObjectVersionsRequest + + ListObjectsRequest(*s3.ListObjectsInput) s3.ListObjectsRequest + + ListObjectsV2Request(*s3.ListObjectsV2Input) s3.ListObjectsV2Request + + ListPartsRequest(*s3.ListPartsInput) s3.ListPartsRequest + + PutBucketAccelerateConfigurationRequest(*s3.PutBucketAccelerateConfigurationInput) s3.PutBucketAccelerateConfigurationRequest + + PutBucketAclRequest(*s3.PutBucketAclInput) s3.PutBucketAclRequest + + PutBucketAnalyticsConfigurationRequest(*s3.PutBucketAnalyticsConfigurationInput) s3.PutBucketAnalyticsConfigurationRequest + + PutBucketCorsRequest(*s3.PutBucketCorsInput) s3.PutBucketCorsRequest + + PutBucketEncryptionRequest(*s3.PutBucketEncryptionInput) s3.PutBucketEncryptionRequest + + PutBucketInventoryConfigurationRequest(*s3.PutBucketInventoryConfigurationInput) s3.PutBucketInventoryConfigurationRequest + + PutBucketLifecycleRequest(*s3.PutBucketLifecycleInput) s3.PutBucketLifecycleRequest + + PutBucketLifecycleConfigurationRequest(*s3.PutBucketLifecycleConfigurationInput) s3.PutBucketLifecycleConfigurationRequest + + PutBucketLoggingRequest(*s3.PutBucketLoggingInput) s3.PutBucketLoggingRequest + + PutBucketMetricsConfigurationRequest(*s3.PutBucketMetricsConfigurationInput) s3.PutBucketMetricsConfigurationRequest + + PutBucketNotificationRequest(*s3.PutBucketNotificationInput) s3.PutBucketNotificationRequest + + PutBucketNotificationConfigurationRequest(*s3.PutBucketNotificationConfigurationInput) s3.PutBucketNotificationConfigurationRequest + + PutBucketPolicyRequest(*s3.PutBucketPolicyInput) s3.PutBucketPolicyRequest + + PutBucketReplicationRequest(*s3.PutBucketReplicationInput) s3.PutBucketReplicationRequest + + PutBucketRequestPaymentRequest(*s3.PutBucketRequestPaymentInput) s3.PutBucketRequestPaymentRequest + + PutBucketTaggingRequest(*s3.PutBucketTaggingInput) s3.PutBucketTaggingRequest + + PutBucketVersioningRequest(*s3.PutBucketVersioningInput) s3.PutBucketVersioningRequest + + PutBucketWebsiteRequest(*s3.PutBucketWebsiteInput) s3.PutBucketWebsiteRequest + + PutObjectRequest(*s3.PutObjectInput) s3.PutObjectRequest + + PutObjectAclRequest(*s3.PutObjectAclInput) s3.PutObjectAclRequest + + PutObjectLegalHoldRequest(*s3.PutObjectLegalHoldInput) s3.PutObjectLegalHoldRequest + + PutObjectLockConfigurationRequest(*s3.PutObjectLockConfigurationInput) s3.PutObjectLockConfigurationRequest + + PutObjectRetentionRequest(*s3.PutObjectRetentionInput) s3.PutObjectRetentionRequest + + PutObjectTaggingRequest(*s3.PutObjectTaggingInput) s3.PutObjectTaggingRequest + + PutPublicAccessBlockRequest(*s3.PutPublicAccessBlockInput) s3.PutPublicAccessBlockRequest + + RestoreObjectRequest(*s3.RestoreObjectInput) s3.RestoreObjectRequest + + UploadPartRequest(*s3.UploadPartInput) s3.UploadPartRequest + + UploadPartCopyRequest(*s3.UploadPartCopyInput) s3.UploadPartCopyRequest + + WaitUntilBucketExists(context.Context, *s3.HeadBucketInput, ...aws.WaiterOption) error + + WaitUntilBucketNotExists(context.Context, *s3.HeadBucketInput, ...aws.WaiterOption) error + + WaitUntilObjectExists(context.Context, *s3.HeadObjectInput, ...aws.WaiterOption) error + + WaitUntilObjectNotExists(context.Context, *s3.HeadObjectInput, ...aws.WaiterOption) error +} + +var _ ClientAPI = (*s3.Client)(nil) diff --git a/vendor/vendor.json b/vendor/vendor.json index 15fcc818ce2..ac34c3e30c5 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -512,7 +512,15 @@ "versionExact": "v0.9.0" }, { - "checksumSHA1": "VOdJIdzSIvHiu23lwsxFOTeBrWk=", + "checksumSHA1": "e0qV+W56A3/QgA1xDkaAwTIffAw=", + "path": "github.com/aws/aws-sdk-go-v2/service/s3/s3iface", + "revision": "098e15df3044cf1b04a222c1c33c3e6135ac89f3", + "revisionTime": "2019-05-28T21:51:27Z", + "version": "v0.9.0", + "versionExact": "v0.9.0" + }, + { + "checksumSHA1": "Y+mpKnu57hYbC4z3nrSFR9BkAec=", "path": "github.com/aws/aws-sdk-go-v2/service/sqs", "revision": "098e15df3044cf1b04a222c1c33c3e6135ac89f3", "revisionTime": "2019-05-28T21:51:27Z", diff --git a/x-pack/filebeat/filebeat.yml b/x-pack/filebeat/filebeat.yml index 5cfa273c303..f266074aa1c 100644 --- a/x-pack/filebeat/filebeat.yml +++ b/x-pack/filebeat/filebeat.yml @@ -13,19 +13,23 @@ #=========================== Filebeat inputs ============================= filebeat.inputs: - +- type: s3 + queueURLs: ["https://sqs.ap-southeast-1.amazonaws.com/627959692251/test-s3-notification"] + access_key_id: '${AWS_ACCESS_KEY_ID:""}' + secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' + session_token: '${AWS_SESSION_TOKEN:"”}' # Each - is an input. Most options can be set at the input level, so # you can use different inputs for various configurations. # Below are the input specific configurations. -- type: log +#- type: log # Change to true to enable this input configuration. - enabled: false + #enabled: false # Paths that should be crawled and fetched. Glob based paths. - paths: - - /var/log/*.log + #paths: + # - /var/log/*.log #- c:\programdata\elasticsearch\logs\* # Exclude lines. A list of regular expressions to match. It drops the lines that are diff --git a/x-pack/filebeat/include/list.go b/x-pack/filebeat/include/list.go index d6787809a1b..9d8adc45b1e 100644 --- a/x-pack/filebeat/include/list.go +++ b/x-pack/filebeat/include/list.go @@ -10,6 +10,7 @@ import ( // Import packages that need to register themselves. _ "github.com/elastic/beats/x-pack/filebeat/input/googlepubsub" _ "github.com/elastic/beats/x-pack/filebeat/input/netflow" + _ "github.com/elastic/beats/x-pack/filebeat/input/s3" _ "github.com/elastic/beats/x-pack/filebeat/module/cisco" _ "github.com/elastic/beats/x-pack/filebeat/module/coredns" _ "github.com/elastic/beats/x-pack/filebeat/module/envoyproxy" diff --git a/x-pack/filebeat/input/s3/_meta/fields.yml b/x-pack/filebeat/input/s3/_meta/fields.yml new file mode 100644 index 00000000000..f41e2222d81 --- /dev/null +++ b/x-pack/filebeat/input/s3/_meta/fields.yml @@ -0,0 +1,19 @@ +- key: s3 + title: "s3" + description: > + Fields from s3 input. + release: beta + fields: + - name: log.source + type: group + description: > + Source of the log message. + fields: + - name: bucketName + type: keyword + description: > + Name of the S3 bucket that this log retrieved from. + - name: objectKey + type: keyword + description: > + Name of the S3 object that this log retrieved from. diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go new file mode 100644 index 00000000000..83ff62f6388 --- /dev/null +++ b/x-pack/filebeat/input/s3/config.go @@ -0,0 +1,21 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package s3 + +import "github.com/elastic/beats/filebeat/harvester" + +var defaultConfig = config{ + ForwarderConfig: harvester.ForwarderConfig{ + Type: "s3", + }, +} + +type config struct { + harvester.ForwarderConfig `config:",inline"` + QueueURLs []string `config:"queueURLs"` + AccessKeyID string `config:"access_key_id" validate:"nonzero,required"` + SecretAccessKey string `config:"secret_access_key" validate:"nonzero,required"` + SessionToken string `config:"session_token"` +} diff --git a/x-pack/filebeat/input/s3/fields.go b/x-pack/filebeat/input/s3/fields.go new file mode 100644 index 00000000000..ade84fea427 --- /dev/null +++ b/x-pack/filebeat/input/s3/fields.go @@ -0,0 +1,23 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +// Code generated by beats/dev-tools/cmd/asset/asset.go - DO NOT EDIT. + +package s3 + +import ( + "github.com/elastic/beats/libbeat/asset" +) + +func init() { + if err := asset.SetFields("filebeat", "s3", asset.ModuleFieldsPri, AssetS3); err != nil { + panic(err) + } +} + +// AssetS3 returns asset data. +// This is the base64 encoded gzipped contents of input/s3. +func AssetS3() string { + return "eJyskL1OxDAQhHs/xej6S5POBSUNEs09gZNMciY/jrwbkN8e2SQoEiCa28LFeGe+0V4xMllIbQD1OtHiIvXFAB2ljX5VHxaLJwMAz55TJ+hjmCE1/LJuWhkgcqITWjRUZ4C+7NniuWJxMy2mMFQSttiyyICmlRZDDNu6K78Q89yKC6GH3plzMFPEDaz2lTPujGy2dqS+upnfXwd2ZPoIsTvpf8Dz5IQDf6v3WOjd5cdLqRSp0fOdXblO9aNLaN7Y6gvTQ6t8pf5T5TMAAP//4lKLDw==" +} diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go new file mode 100644 index 00000000000..35da7d2e438 --- /dev/null +++ b/x-pack/filebeat/input/s3/input.go @@ -0,0 +1,324 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package s3 + +import ( + "bytes" + "encoding/json" + "strings" + "sync" + "time" + + awssdk "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/defaults" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/s3iface" + "github.com/aws/aws-sdk-go-v2/service/sqs" + "github.com/pkg/errors" + + "github.com/elastic/beats/filebeat/channel" + "github.com/elastic/beats/filebeat/harvester" + "github.com/elastic/beats/filebeat/input" + "github.com/elastic/beats/filebeat/util" + "github.com/elastic/beats/libbeat/beat" + "github.com/elastic/beats/libbeat/common" + "github.com/elastic/beats/libbeat/common/cfgwarn" + "github.com/elastic/beats/libbeat/logp" +) + +var ( + // input name + inputName = "s3" + // MaxNumberOfMessage at one poll + MaxNumberOfMessage int64 = 10 + // WaitTimeSecond for each poll + WaitTimeSecond int64 = 20 +) + +type s3Info struct { + name string + key string +} + +func init() { + err := input.Register(inputName, NewInput) + if err != nil { + panic(err) + } +} + +// Input is a input for s3 +type Input struct { + started bool + outlet channel.Outleter + config config + cfg *common.Config + registry *harvester.Registry + logger *logp.Logger +} + +// NewInput creates a new s3 input +func NewInput(cfg *common.Config, outletFactory channel.Connector, context input.Context) (input.Input, error) { + cfgwarn.Beta("s3 input type is used") + + logger := logp.NewLogger(inputName) + + config := defaultConfig + if err := cfg.Unpack(&config); err != nil { + return nil, errors.Wrap(err, "failed unpacking config") + } + + awsConfig := defaults.Config() + awsCredentials := awssdk.Credentials{ + AccessKeyID: config.AccessKeyID, + SecretAccessKey: config.SecretAccessKey, + } + + if config.SessionToken != "" { + awsCredentials.SessionToken = config.SessionToken + } + + awsConfig.Credentials = awssdk.StaticCredentialsProvider{ + Value: awsCredentials, + } + + outlet, err := outletFactory(cfg, context.DynamicFields) + if err != nil { + return nil, err + } + + p := &Input{ + started: false, + outlet: outlet, + cfg: cfg, + config: config, + logger: logger, + registry: harvester.NewRegistry(), + } + + return p, nil +} + +// Run runs the input +func (p *Input) Run() { + p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) + if len(p.config.QueueURLs) == 0 { + p.logger.Error("No sqs queueURLs configured") + return + } + + awsConfig := defaults.Config() + awsCredentials := awssdk.Credentials{ + AccessKeyID: p.config.AccessKeyID, + SecretAccessKey: p.config.SecretAccessKey, + } + if p.config.SessionToken != "" { + awsCredentials.SessionToken = p.config.SessionToken + } + + awsConfig.Credentials = awssdk.StaticCredentialsProvider{ + Value: awsCredentials, + } + + forwarder := harvester.NewForwarder(p.outlet) + for _, queueURL := range p.config.QueueURLs { + var d *util.Data + regionName, err := getRegionFromQueueURL(queueURL) + if err != nil { + p.logger.Errorf("failed to get region name from queueURL: %s", queueURL) + continue + } + + awsConfig.Region = regionName + svcSQS := sqs.New(awsConfig) + svcS3 := s3.New(awsConfig) + + // RECEIVE + receiveMessageInput := &sqs.ReceiveMessageInput{ + QueueUrl: &queueURL, + MessageAttributeNames: []string{"All"}, + MaxNumberOfMessages: &MaxNumberOfMessage, + VisibilityTimeout: awssdk.Int64(20), // 20 seconds + WaitTimeSeconds: &WaitTimeSecond, + } + + req := svcSQS.ReceiveMessageRequest(receiveMessageInput) + output, errR := req.Send() + if errR != nil { + return + } + + if len(output.Messages) > 0 { + events, messagesReceiptHandles, err := p.receiveMessages(queueURL, output.Messages, svcS3, svcSQS) + if err != nil { + p.logger.Error(errors.Wrap(err, "receiveMessages failed")) + } + + for _, event := range events { + d = &util.Data{Event: *event} + err = forwarder.Send(d) + if err != nil { + p.logger.Error(errors.Wrap(err, "forwarder send failed")) + } + } + + // TODO: When log message collection takes longer than 30s(default filebeat freq?), + // sqs messages got read twice or more because it didn't get deleted fast enough. + // delete message after events are sent + err = deleteMessages(queueURL, messagesReceiptHandles, svcSQS) + if err != nil { + p.logger.Error(errors.Wrap(err, "deleteMessages failed")) + } + } + } +} + +// Stop stops the input and all its harvesters +func (p *Input) Stop() { + p.registry.Stop() + p.outlet.Close() +} + +// Wait stops the s3 input. +func (p *Input) Wait() { + p.Stop() +} + +func getRegionFromQueueURL(queueURL string) (string, error) { + // get region from queueURL + // Example: https://sqs.us-east-1.amazonaws.com/627959692251/test-s3-logs + queueURLSplit := strings.Split(queueURL, ".") + if queueURLSplit[0] == "https://sqs" && queueURLSplit[2] == "amazonaws" { + return queueURLSplit[1], nil + } + return "", errors.New("queueURL is not in format: https://sqs.{REGION_ENDPOINT}.amazonaws.com/{ACCOUNT_NUMBER}/{QUEUE_NAME}") +} + +// launches goroutine per received message and wait for all message to be processed +func (p *Input) receiveMessages(queueURL string, messages []sqs.Message, svcS3 s3iface.S3API, svcSQS *sqs.SQS) ([]*beat.Event, []string, error) { + var eventsTotal []*beat.Event + var messagesReceiptHandles []string + var wg sync.WaitGroup + + // TODO: Check goroutine cleanup + numMessages := len(messages) + wg.Add(numMessages) + for i := range messages { + go func(m sqs.Message) { + // launch goroutine to handle each message + defer wg.Done() + + s3Infos, err := handleMessage(m) + if err != nil { + p.logger.Error(err.Error()) + } + + if err != nil { + p.logger.Error(err.Error()) + } + + // read from s3 + events, err := readS3Object(svcS3, s3Infos) + if err != nil { + p.logger.Error(err.Error()) + } + + eventsTotal = append(eventsTotal, events...) + messagesReceiptHandles = append(messagesReceiptHandles, *m.ReceiptHandle) + }(messages[i]) + } + + wg.Wait() + return eventsTotal, messagesReceiptHandles, nil +} + +// handle message +func handleMessage(m sqs.Message) (s3Infos []s3Info, err error) { + msg := map[string]interface{}{} + err = json.Unmarshal([]byte(*m.Body), &msg) + if err != nil { + err = errors.Wrap(err, "json unmarshal sqs message body failed") + return + } + + records := msg["Records"].([]interface{}) + s3Info := s3Info{} + for _, record := range records { + recordMap := record.(map[string]interface{}) + if recordMap["eventSource"] == "aws:s3" && recordMap["eventName"] == "ObjectCreated:Put" { + s3Record := recordMap["s3"].(map[string]interface{}) + bucketInfo := s3Record["bucket"].(map[string]interface{}) + objectInfo := s3Record["object"].(map[string]interface{}) + s3Info.name = bucketInfo["name"].(string) + s3Info.key = objectInfo["key"].(string) + s3Infos = append(s3Infos, s3Info) + } + } + return +} + +func readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event, error) { + var events []*beat.Event + for _, s3Info := range s3Infos { + s3GetObjectInput := &s3.GetObjectInput{ + Bucket: awssdk.String(s3Info.name), + Key: awssdk.String(s3Info.key), + } + objReq := svc.GetObjectRequest(s3GetObjectInput) + + objResp, err := objReq.Send() + if err != nil { + return nil, errors.Wrap(err, "s3 get object request failed") + } + + // TODO: check way to stream + buf := new(bytes.Buffer) + _, err = buf.ReadFrom(objResp.Body) + if err != nil { + return nil, errors.Wrap(err, "buf.ReadFrom failed") + } + + s := buf.String() // Does a complete copy of the bytes in the buffer. + logLines := strings.Split(s, "\n") + for _, log := range logLines { + // create event + event := createEvent(log, s3Info) + events = append(events, event) + } + } + return events, nil +} + +func deleteMessages(queueURL string, messagesReceiptHandles []string, svcSQS *sqs.SQS) error { + for _, receiptHandle := range messagesReceiptHandles { + deleteMessageInput := &sqs.DeleteMessageInput{ + QueueUrl: awssdk.String(queueURL), + ReceiptHandle: awssdk.String(receiptHandle), + } + + req := svcSQS.DeleteMessageRequest(deleteMessageInput) + _, err := req.Send() + if err != nil { + return errors.Wrap(err, "DeleteMessageRequest failed") + } + } + return nil +} + +func createEvent(log string, s3Info s3Info) *beat.Event { + f := common.MapStr{ + "message": log, + "log": common.MapStr{ + "source": common.MapStr{ + "bucketName": s3Info.name, + "objectKey": s3Info.key, + }, + }, + } + return &beat.Event{ + Timestamp: time.Now(), + Fields: f, + } +} diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go new file mode 100644 index 00000000000..e226dd51870 --- /dev/null +++ b/x-pack/filebeat/input/s3/input_test.go @@ -0,0 +1,107 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package s3 + +import ( + "bytes" + "io/ioutil" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/s3iface" + + awssdk "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/sqs" + "github.com/stretchr/testify/assert" +) + +// MockS3Client struct is used for unit tests. +type MockS3Client struct { + s3iface.S3API +} + +var s3LogString = "36c1f05b76016b78528454e6e0c60e2b7ff7aa20c0a5e4c748276e5b0a2debd2 test-s3-ks [20/Jun/2019:04:07:48 +0000] 97.118.27.161 arn:aws:iam::627959692251:user/kaiyan.sheng@elastic.co 5141F2225A070122 REST.HEAD.OBJECT Screen%2BShot%2B2019-02-21%2Bat%2B2.15.50%2BPM.png" + +func (m *MockS3Client) GetObjectRequest(input *s3.GetObjectInput) s3.GetObjectRequest { + logBody := ioutil.NopCloser(bytes.NewReader([]byte(s3LogString))) + return s3.GetObjectRequest{ + Request: &awssdk.Request{ + Data: &s3.GetObjectOutput{ + Body: logBody, + }, + }, + } +} + +func TestGetRegionFromQueueURL(t *testing.T) { + queueURL := "https://sqs.us-east-1.amazonaws.com/627959692251/test-s3-logs" + regionName, err := getRegionFromQueueURL(queueURL) + assert.NoError(t, err) + assert.Equal(t, "us-east-1", regionName) +} + +func TestHandleMessage(t *testing.T) { + cases := []struct { + title string + message sqs.Message + expectedS3Infos []s3Info + }{ + { + "sqs message with event source aws:s3 and event name ObjectCreated:Put", + sqs.Message{ + Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), + }, + []s3Info{ + { + name: "test-s3-ks-2", + key: "server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA", + }, + }, + }, + { + "sqs message with event source aws:s3 and event name ObjectCreated:Delete", + sqs.Message{ + Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Delete\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), + }, + []s3Info{}, + }, + { + "sqs message with event source aws:ec2 and event name ObjectCreated:Put", + sqs.Message{ + Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:ec2\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), + }, + []s3Info{}, + }, + } + for _, c := range cases { + s3Info, err := handleMessage(c.message) + assert.NoError(t, err) + assert.Equal(t, len(c.expectedS3Infos), len(s3Info)) + if len(s3Info) > 0 { + assert.Equal(t, c.expectedS3Infos[0].key, s3Info[0].key) + assert.Equal(t, c.expectedS3Infos[0].name, s3Info[0].name) + } + } +} + +func TestReadS3Object(t *testing.T) { + mockSvc := &MockS3Client{} + s3Info := []s3Info{ + { + name: "test-s3-ks-2", + key: "log2019-06-21-16-16-54", + }, + } + events, err := readS3Object(mockSvc, s3Info) + assert.NoError(t, err) + assert.Equal(t, 1, len(events)) + bucketName, err := events[0].Fields.GetValue("log.source.bucketName") + objectKey, err := events[0].Fields.GetValue("log.source.objectKey") + message, err := events[0].Fields.GetValue("message") + assert.NoError(t, err) + assert.Equal(t, "test-s3-ks-2", bucketName.(string)) + assert.Equal(t, "log2019-06-21-16-16-54", objectKey.(string)) + assert.Equal(t, s3LogString, message.(string)) +} From a98e05c8431e36f0737d1dcddab629f485560628 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 21 Jun 2019 14:00:45 -0600 Subject: [PATCH 02/59] run mage fmt update --- x-pack/filebeat/filebeat.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/x-pack/filebeat/filebeat.yml b/x-pack/filebeat/filebeat.yml index f266074aa1c..5cfa273c303 100644 --- a/x-pack/filebeat/filebeat.yml +++ b/x-pack/filebeat/filebeat.yml @@ -13,23 +13,19 @@ #=========================== Filebeat inputs ============================= filebeat.inputs: -- type: s3 - queueURLs: ["https://sqs.ap-southeast-1.amazonaws.com/627959692251/test-s3-notification"] - access_key_id: '${AWS_ACCESS_KEY_ID:""}' - secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' - session_token: '${AWS_SESSION_TOKEN:"”}' + # Each - is an input. Most options can be set at the input level, so # you can use different inputs for various configurations. # Below are the input specific configurations. -#- type: log +- type: log # Change to true to enable this input configuration. - #enabled: false + enabled: false # Paths that should be crawled and fetched. Glob based paths. - #paths: - # - /var/log/*.log + paths: + - /var/log/*.log #- c:\programdata\elasticsearch\logs\* # Exclude lines. A list of regular expressions to match. It drops the lines that are From 9b11cb0a2535c332693704851e96ac8ef2230cfb Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 24 Jun 2019 16:20:40 -0600 Subject: [PATCH 03/59] update doc --- filebeat/docs/fields.asciidoc | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 8a05fb97689..629c6e4b9d5 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -48,6 +48,7 @@ grouped in the following categories: * <> * <> * <> +* <> * <> * <> * <> @@ -12840,6 +12841,40 @@ The arguments with which the command was called. type: keyword +-- + +[[exported-fields-s3]] +== s3 fields + +Fields from s3 input. + + + +[float] +== log.source fields + +Source of the log message. + + + +*`log.source.bucket_name`*:: ++ +-- +type: keyword + +Name of the S3 bucket that this log retrieved from. + + +-- + +*`log.source.object_key`*:: ++ +-- +type: keyword + +Name of the S3 object that this log retrieved from. + + -- [[exported-fields-santa]] From 9d5a6243408ad6c87a6f143a7955568494b989cc Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 24 Jun 2019 16:30:53 -0600 Subject: [PATCH 04/59] Change bucketName to bucket_name --- x-pack/filebeat/input/s3/_meta/fields.yml | 4 ++-- x-pack/filebeat/input/s3/fields.go | 2 +- x-pack/filebeat/input/s3/input.go | 7 +++++-- x-pack/filebeat/input/s3/input_test.go | 4 ++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/x-pack/filebeat/input/s3/_meta/fields.yml b/x-pack/filebeat/input/s3/_meta/fields.yml index f41e2222d81..231dafc13c3 100644 --- a/x-pack/filebeat/input/s3/_meta/fields.yml +++ b/x-pack/filebeat/input/s3/_meta/fields.yml @@ -9,11 +9,11 @@ description: > Source of the log message. fields: - - name: bucketName + - name: bucket_name type: keyword description: > Name of the S3 bucket that this log retrieved from. - - name: objectKey + - name: object_key type: keyword description: > Name of the S3 object that this log retrieved from. diff --git a/x-pack/filebeat/input/s3/fields.go b/x-pack/filebeat/input/s3/fields.go index ade84fea427..3236d9ba202 100644 --- a/x-pack/filebeat/input/s3/fields.go +++ b/x-pack/filebeat/input/s3/fields.go @@ -19,5 +19,5 @@ func init() { // AssetS3 returns asset data. // This is the base64 encoded gzipped contents of input/s3. func AssetS3() string { - return "eJyskL1OxDAQhHs/xej6S5POBSUNEs09gZNMciY/jrwbkN8e2SQoEiCa28LFeGe+0V4xMllIbQD1OtHiIvXFAB2ljX5VHxaLJwMAz55TJ+hjmCE1/LJuWhkgcqITWjRUZ4C+7NniuWJxMy2mMFQSttiyyICmlRZDDNu6K78Q89yKC6GH3plzMFPEDaz2lTPujGy2dqS+upnfXwd2ZPoIsTvpf8Dz5IQDf6v3WOjd5cdLqRSp0fOdXblO9aNLaN7Y6gvTQ6t8pf5T5TMAAP//4lKLDw==" + return "eJyskL1OxDAQhHs/xej6S5POBSUlzT3AyUkmOZMfR94NyG+PbEIUCRANW1jyeGe+ka8YmSykNoB6nWhxkfpigI7SRr+qD4vFkwGAZ8+pE/QxzJAaflk3rQwQOdEJLRqqM0Bf9mzxXLG4mRZTGCoJW2xZZEDTSoshhm3dlR+IeW7FhdBDH8w5mCniBlb7yhl3RjZbO1Lv+XK8fXFHpvcQu5P+Cz3Pi5sP/q3ec6EPlw8vpVOkRs83duV7qm9lQvPKVu8j0792+Yz9o8tHAAAA//9IDIwN" } diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 35da7d2e438..1fb4af3bf69 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -284,6 +284,9 @@ func readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event, error) { logLines := strings.Split(s, "\n") for _, log := range logLines { // create event + if log == "" { + continue + } event := createEvent(log, s3Info) events = append(events, event) } @@ -312,8 +315,8 @@ func createEvent(log string, s3Info s3Info) *beat.Event { "message": log, "log": common.MapStr{ "source": common.MapStr{ - "bucketName": s3Info.name, - "objectKey": s3Info.key, + "bucket_name": s3Info.name, + "object_key": s3Info.key, }, }, } diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index e226dd51870..7c5fb56fa32 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -97,8 +97,8 @@ func TestReadS3Object(t *testing.T) { events, err := readS3Object(mockSvc, s3Info) assert.NoError(t, err) assert.Equal(t, 1, len(events)) - bucketName, err := events[0].Fields.GetValue("log.source.bucketName") - objectKey, err := events[0].Fields.GetValue("log.source.objectKey") + bucketName, err := events[0].Fields.GetValue("log.source.bucket_name") + objectKey, err := events[0].Fields.GetValue("log.source.object_key") message, err := events[0].Fields.GetValue("message") assert.NoError(t, err) assert.Equal(t, "test-s3-ks-2", bucketName.(string)) From 252e2d78b6b478de920e6534577e2230b9a5fa0a Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 24 Jun 2019 21:14:21 -0600 Subject: [PATCH 05/59] Run make update --- filebeat/docs/fields.asciidoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 629c6e4b9d5..4aff9d4b807 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -12851,7 +12851,7 @@ Fields from s3 input. [float] -== log.source fields +=== log.source Source of the log message. @@ -12860,21 +12860,21 @@ Source of the log message. *`log.source.bucket_name`*:: + -- -type: keyword - Name of the S3 bucket that this log retrieved from. +type: keyword + -- *`log.source.object_key`*:: + -- -type: keyword - Name of the S3 object that this log retrieved from. +type: keyword + -- [[exported-fields-santa]] From cf23865ce5c0aecadd4b6e9b986d0089edf1d91f Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 25 Jun 2019 18:13:40 -0600 Subject: [PATCH 06/59] remove receiveMessages function and put goroutines directly in Run --- filebeat/docs/fields.asciidoc | 126 ++++++--- .../_meta/common.reference.inputs.yml | 17 ++ x-pack/filebeat/filebeat.reference.yml | 17 ++ x-pack/filebeat/input/s3/_meta/fields.yml | 40 ++- x-pack/filebeat/input/s3/config.go | 3 +- x-pack/filebeat/input/s3/fields.go | 2 +- x-pack/filebeat/input/s3/input.go | 255 +++++++++--------- x-pack/filebeat/input/s3/input_test.go | 99 ++++++- 8 files changed, 382 insertions(+), 177 deletions(-) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 4aff9d4b807..8ab90897b05 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -14,9 +14,11 @@ grouped in the following categories: * <> * <> +* <> * <> * <> * <> +* <> * <> * <> * <> @@ -34,6 +36,7 @@ grouped in the following categories: * <> * <> * <> +* <> * <> * <> * <> @@ -48,7 +51,6 @@ grouped in the following categories: * <> * <> * <> -* <> * <> * <> * <> @@ -987,6 +989,40 @@ alias to: destination.address -- +[[exported-fields-aws]] +== aws fields + +Aws fields from s3 input. + + + +[float] +=== s3 + +S3 related information. + + + +*`s3.bucket_name`*:: ++ +-- +Name of the S3 bucket that this log retrieved from. + + +type: keyword + +-- + +*`s3.object_key`*:: ++ +-- +Name of the S3 object that this log retrieved from. + + +type: keyword + +-- + [[exported-fields-beat-common]] == Beat fields @@ -1368,6 +1404,33 @@ alias to: cloud.region -- +[[exported-fields-cloud]] +== cloud fields + +Cloud fields from s3 input. + + + +*`provider`*:: ++ +-- +Name of the cloud provider for S3. + + +type: keyword + +-- + +*`region`*:: ++ +-- +Region name of the S3 bucket. + + +type: keyword + +-- + [[exported-fields-coredns]] == Coredns fields @@ -7814,6 +7877,33 @@ type: long Name of organization associated with the autonomous system. +type: keyword + +-- + +[[exported-fields-log]] +== log fields + +Log fields from s3 input. + + + +*`offset`*:: ++ +-- +The file offset the reported line starts at. + + +type: long + +-- + +*`file.path`*:: ++ +-- +The objectURL of the file from which the line was read. + + type: keyword -- @@ -12839,40 +12929,6 @@ type: keyword The arguments with which the command was called. -type: keyword - --- - -[[exported-fields-s3]] -== s3 fields - -Fields from s3 input. - - - -[float] -=== log.source - -Source of the log message. - - - -*`log.source.bucket_name`*:: -+ --- -Name of the S3 bucket that this log retrieved from. - - -type: keyword - --- - -*`log.source.object_key`*:: -+ --- -Name of the S3 object that this log retrieved from. - - type: keyword -- diff --git a/x-pack/filebeat/_meta/common.reference.inputs.yml b/x-pack/filebeat/_meta/common.reference.inputs.yml index c096aae7e7b..e72a828ad7c 100644 --- a/x-pack/filebeat/_meta/common.reference.inputs.yml +++ b/x-pack/filebeat/_meta/common.reference.inputs.yml @@ -53,3 +53,20 @@ # Path to a JSON file containing the credentials and key used to subscribe. credentials_file: ${path.config}/my-pubsub-subscriber-credentials.json + +#------------------------------ S3 input -------------------------------- +# Experimental: Config options for AWS S3 input +#- type: s3 + #enabled: false + + # AWS Credentials (required) + #access_key_id: '${AWS_ACCESS_KEY_ID:""}' + #secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' + #session_token: '${AWS_SESSION_TOKEN:"”}' + #credential_profile_name: test-s3-input + + # QueueURLs (required) to receive queue messages from + #queueURLs: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] + + # S3buckets to collect logs from + # bucketNames: ["bucket-test-1", "bucket-test-2"] diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index bf52d6e125d..d323192a853 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -972,6 +972,23 @@ filebeat.inputs: # Path to a JSON file containing the credentials and key used to subscribe. credentials_file: ${path.config}/my-pubsub-subscriber-credentials.json + +#------------------------------ S3 input -------------------------------- +# Experimental: Config options for AWS S3 input +#- type: s3 + #enabled: false + + # AWS Credentials (required) + #access_key_id: '${AWS_ACCESS_KEY_ID:""}' + #secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' + #session_token: '${AWS_SESSION_TOKEN:"”}' + #credential_profile_name: test-s3-input + + # QueueURLs (required) to receive queue messages from + #queueURLs: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] + + # S3buckets to collect logs from + # bucketNames: ["bucket-test-1", "bucket-test-2"] #========================== Filebeat autodiscover ============================== # Autodiscover allows you to detect changes in the system and spawn new modules diff --git a/x-pack/filebeat/input/s3/_meta/fields.yml b/x-pack/filebeat/input/s3/_meta/fields.yml index 231dafc13c3..8013f6b6073 100644 --- a/x-pack/filebeat/input/s3/_meta/fields.yml +++ b/x-pack/filebeat/input/s3/_meta/fields.yml @@ -1,13 +1,28 @@ -- key: s3 - title: "s3" +- key: log + title: "log" description: > - Fields from s3 input. + Log fields from s3 input. release: beta fields: - - name: log.source + - name: offset + type: long + description: > + The file offset the reported line starts at. + - name: file.path + type: keyword + description: > + The objectURL of the file from which the line was read. + +- key: aws + title: "aws" + description: > + Aws fields from s3 input. + release: beta + fields: + - name: s3 type: group description: > - Source of the log message. + S3 related information. fields: - name: bucket_name type: keyword @@ -17,3 +32,18 @@ type: keyword description: > Name of the S3 object that this log retrieved from. + +- key: cloud + title: "cloud" + description: > + Cloud fields from s3 input. + release: beta + fields: + - name: provider + type: keyword + description: > + Name of the cloud provider for S3. + - name: region + type: keyword + description: > + Region name of the S3 bucket. diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index 83ff62f6388..612cb67ad04 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -14,7 +14,8 @@ var defaultConfig = config{ type config struct { harvester.ForwarderConfig `config:",inline"` - QueueURLs []string `config:"queueURLs"` + QueueURLs []string `config:"queueURLs" validate:"nonzero,required"` + BucketNames []string `config:"bucketNames"` AccessKeyID string `config:"access_key_id" validate:"nonzero,required"` SecretAccessKey string `config:"secret_access_key" validate:"nonzero,required"` SessionToken string `config:"session_token"` diff --git a/x-pack/filebeat/input/s3/fields.go b/x-pack/filebeat/input/s3/fields.go index 3236d9ba202..9839b2a4172 100644 --- a/x-pack/filebeat/input/s3/fields.go +++ b/x-pack/filebeat/input/s3/fields.go @@ -19,5 +19,5 @@ func init() { // AssetS3 returns asset data. // This is the base64 encoded gzipped contents of input/s3. func AssetS3() string { - return "eJyskL1OxDAQhHs/xej6S5POBSUlzT3AyUkmOZMfR94NyG+PbEIUCRANW1jyeGe+ka8YmSykNoB6nWhxkfpigI7SRr+qD4vFkwGAZ8+pE/QxzJAaflk3rQwQOdEJLRqqM0Bf9mzxXLG4mRZTGCoJW2xZZEDTSoshhm3dlR+IeW7FhdBDH8w5mCniBlb7yhl3RjZbO1Lv+XK8fXFHpvcQu5P+Cz3Pi5sP/q3ec6EPlw8vpVOkRs83duV7qm9lQvPKVu8j0792+Yz9o8tHAAAA//9IDIwN" + return "eJyskz9v3DAMxXd9iofs8eLNQ4Gia9Ah186BzqJs1bJpUHQMf/tCipP6kH8IrhoMSBT5I9+TbzHQ1iByZwANGqnBTeTuxgCOUith1sBTg28GAO64gw8UXYIXHpFqhGletDKAUCSbqMGZ1Brs95qSd4vJjtSAvU+k5QjQbaaMnrr94A1gXr96gg+R9mxoTxCaWZQcYpgISa1ogi19/KPlpGq22l8AB9pWFvc5k89/qNXf93dgX6CliTL32oe2L2eFv9oEIesqY3ZB7ZoOgto1vSPo9zVdJWiqL2brhJf548lOdS5ss3Zh8iyjzTeqPX5kHDnnpR1IH/LmJfa2oB+g8/ppR3rW81TvdaG9zZ+Q8kuEkEqgR3JFk+pVM0/GPAy0/ddensp+0suzwW3kxR0sLvt3TP6RY1fZPAs/Bkfy9Yd8nLH0+FILngWn+vKXEeoCT1/H3Je8UuSVu5X5GwAA//9i6E0j" } diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 1fb4af3bf69..d1dba58ef51 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -29,18 +29,25 @@ import ( ) var ( - // input name + // Filebeat input name inputName = "s3" - // MaxNumberOfMessage at one poll - MaxNumberOfMessage int64 = 10 - // WaitTimeSecond for each poll - WaitTimeSecond int64 = 20 -) -type s3Info struct { - name string - key string -} + // The maximum number of messages to return. Amazon SQS never returns more messages + // than this value (however, fewer messages might be returned). + maxNumberOfMessage int64 = 10 + + // The duration (in seconds) for which the call waits for a message to arrive + // in the queue before returning. If a message is available, the call returns + // sooner than WaitTimeSeconds. If no messages are available and the wait time + // expires, the call returns successfully with an empty list of messages. + waitTimeSecond int64 = 10 + + // The duration (in seconds) that the received messages are hidden from subsequent + // retrieve requests after being retrieved by a ReceiveMessage request. + // This value needs to be a lot bigger than filebeat collection frequency so + // if it took too long to read the s3 log, this sqs message will not be reprocessed. + visibilityTimeout int64 = 300 +) func init() { err := input.Register(inputName, NewInput) @@ -51,12 +58,17 @@ func init() { // Input is a input for s3 type Input struct { - started bool - outlet channel.Outleter - config config - cfg *common.Config - registry *harvester.Registry - logger *logp.Logger + started bool + outlet channel.Outleter + config config + cfg *common.Config + logger *logp.Logger +} + +type s3Info struct { + name string + key string + region string } // NewInput creates a new s3 input @@ -70,32 +82,21 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return nil, errors.Wrap(err, "failed unpacking config") } - awsConfig := defaults.Config() - awsCredentials := awssdk.Credentials{ - AccessKeyID: config.AccessKeyID, - SecretAccessKey: config.SecretAccessKey, - } - - if config.SessionToken != "" { - awsCredentials.SessionToken = config.SessionToken - } - - awsConfig.Credentials = awssdk.StaticCredentialsProvider{ - Value: awsCredentials, - } - outlet, err := outletFactory(cfg, context.DynamicFields) if err != nil { return nil, err } + if len(config.QueueURLs) == 0 { + return nil, errors.Wrap(err, "No sqs queueURLs configured") + } + p := &Input{ - started: false, - outlet: outlet, - cfg: cfg, - config: config, - logger: logger, - registry: harvester.NewRegistry(), + started: false, + outlet: outlet, + cfg: cfg, + config: config, + logger: logger, } return p, nil @@ -104,10 +105,6 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input // Run runs the input func (p *Input) Run() { p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) - if len(p.config.QueueURLs) == 0 { - p.logger.Error("No sqs queueURLs configured") - return - } awsConfig := defaults.Config() awsCredentials := awssdk.Credentials{ @@ -135,13 +132,13 @@ func (p *Input) Run() { svcSQS := sqs.New(awsConfig) svcS3 := s3.New(awsConfig) - // RECEIVE + // receive messages receiveMessageInput := &sqs.ReceiveMessageInput{ QueueUrl: &queueURL, MessageAttributeNames: []string{"All"}, - MaxNumberOfMessages: &MaxNumberOfMessage, - VisibilityTimeout: awssdk.Int64(20), // 20 seconds - WaitTimeSeconds: &WaitTimeSecond, + MaxNumberOfMessages: &maxNumberOfMessage, + VisibilityTimeout: &visibilityTimeout, + WaitTimeSeconds: &waitTimeSecond, } req := svcSQS.ReceiveMessageRequest(receiveMessageInput) @@ -150,26 +147,44 @@ func (p *Input) Run() { return } + // process messages if len(output.Messages) > 0 { - events, messagesReceiptHandles, err := p.receiveMessages(queueURL, output.Messages, svcS3, svcSQS) - if err != nil { - p.logger.Error(errors.Wrap(err, "receiveMessages failed")) - } - - for _, event := range events { - d = &util.Data{Event: *event} - err = forwarder.Send(d) - if err != nil { - p.logger.Error(errors.Wrap(err, "forwarder send failed")) - } - } - - // TODO: When log message collection takes longer than 30s(default filebeat freq?), - // sqs messages got read twice or more because it didn't get deleted fast enough. - // delete message after events are sent - err = deleteMessages(queueURL, messagesReceiptHandles, svcSQS) - if err != nil { - p.logger.Error(errors.Wrap(err, "deleteMessages failed")) + var wg sync.WaitGroup + numMessages := len(output.Messages) + wg.Add(numMessages) + for i := range output.Messages { + go func(m sqs.Message) { + // launch goroutine to handle each message + defer wg.Done() + + s3Infos, err := handleMessage(m, p.config.BucketNames) + if err != nil { + p.logger.Error(err.Error()) + } + + if err != nil { + p.logger.Error(err.Error()) + } + + // read from s3 + events, err := readS3Object(svcS3, s3Infos) + if err != nil { + p.logger.Error(err.Error()) + } + for _, event := range events { + d = &util.Data{Event: *event} + err = forwarder.Send(d) + if err != nil { + p.logger.Error(errors.Wrap(err, "forwarder send failed")) + } + } + + // delete message after events are sent + err = deleteMessage(queueURL, *m.ReceiptHandle, svcSQS) + if err != nil { + p.logger.Error(errors.Wrap(err, "deleteMessages failed")) + } + }(output.Messages[i]) } } } @@ -177,7 +192,6 @@ func (p *Input) Run() { // Stop stops the input and all its harvesters func (p *Input) Stop() { - p.registry.Stop() p.outlet.Close() } @@ -196,46 +210,8 @@ func getRegionFromQueueURL(queueURL string) (string, error) { return "", errors.New("queueURL is not in format: https://sqs.{REGION_ENDPOINT}.amazonaws.com/{ACCOUNT_NUMBER}/{QUEUE_NAME}") } -// launches goroutine per received message and wait for all message to be processed -func (p *Input) receiveMessages(queueURL string, messages []sqs.Message, svcS3 s3iface.S3API, svcSQS *sqs.SQS) ([]*beat.Event, []string, error) { - var eventsTotal []*beat.Event - var messagesReceiptHandles []string - var wg sync.WaitGroup - - // TODO: Check goroutine cleanup - numMessages := len(messages) - wg.Add(numMessages) - for i := range messages { - go func(m sqs.Message) { - // launch goroutine to handle each message - defer wg.Done() - - s3Infos, err := handleMessage(m) - if err != nil { - p.logger.Error(err.Error()) - } - - if err != nil { - p.logger.Error(err.Error()) - } - - // read from s3 - events, err := readS3Object(svcS3, s3Infos) - if err != nil { - p.logger.Error(err.Error()) - } - - eventsTotal = append(eventsTotal, events...) - messagesReceiptHandles = append(messagesReceiptHandles, *m.ReceiptHandle) - }(messages[i]) - } - - wg.Wait() - return eventsTotal, messagesReceiptHandles, nil -} - // handle message -func handleMessage(m sqs.Message) (s3Infos []s3Info, err error) { +func handleMessage(m sqs.Message, bucketNames []string) (s3Infos []s3Info, err error) { msg := map[string]interface{}{} err = json.Unmarshal([]byte(*m.Body), &msg) if err != nil { @@ -244,14 +220,21 @@ func handleMessage(m sqs.Message) (s3Infos []s3Info, err error) { } records := msg["Records"].([]interface{}) - s3Info := s3Info{} for _, record := range records { recordMap := record.(map[string]interface{}) if recordMap["eventSource"] == "aws:s3" && recordMap["eventName"] == "ObjectCreated:Put" { + s3Info := s3Info{} + if !stringInSlice(recordMap["awsRegion"].(string), bucketNames) { + continue + } + + s3Info.region = recordMap["awsRegion"].(string) s3Record := recordMap["s3"].(map[string]interface{}) + bucketInfo := s3Record["bucket"].(map[string]interface{}) - objectInfo := s3Record["object"].(map[string]interface{}) s3Info.name = bucketInfo["name"].(string) + + objectInfo := s3Record["object"].(map[string]interface{}) s3Info.key = objectInfo["key"].(string) s3Infos = append(s3Infos, s3Info) } @@ -259,6 +242,21 @@ func handleMessage(m sqs.Message) (s3Infos []s3Info, err error) { return } +// stringInSlice checks if a string is already exists in list +// If there is no bucketNames configured, then collect all. +func stringInSlice(name string, bucketNames []string) bool { + if bucketNames == nil || len(bucketNames) == 0 { + return true + } + + for _, v := range bucketNames { + if v == name { + return true + } + } + return false +} + func readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event, error) { var events []*beat.Event for _, s3Info := range s3Infos { @@ -270,6 +268,8 @@ func readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event, error) { objResp, err := objReq.Send() if err != nil { + // What will happen if this object is not a log file or not readable ordoes not exist? + // 2019-06-25T17:21:57.406-0600 ERROR [s3] s3/input.go:220 s3 get object request failed: NoSuchKey: The specified key does not exist. return nil, errors.Wrap(err, "s3 get object request failed") } @@ -280,48 +280,59 @@ func readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event, error) { return nil, errors.Wrap(err, "buf.ReadFrom failed") } - s := buf.String() // Does a complete copy of the bytes in the buffer. + s := buf.String() logLines := strings.Split(s, "\n") - for _, log := range logLines { - // create event + for i, log := range logLines { if log == "" { continue } - event := createEvent(log, s3Info) + + // create event per log line + event := createEvent(log, int64(i), s3Info) events = append(events, event) } } return events, nil } -func deleteMessages(queueURL string, messagesReceiptHandles []string, svcSQS *sqs.SQS) error { - for _, receiptHandle := range messagesReceiptHandles { - deleteMessageInput := &sqs.DeleteMessageInput{ - QueueUrl: awssdk.String(queueURL), - ReceiptHandle: awssdk.String(receiptHandle), - } +func deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.SQS) error { + deleteMessageInput := &sqs.DeleteMessageInput{ + QueueUrl: awssdk.String(queueURL), + ReceiptHandle: awssdk.String(messagesReceiptHandle), + } - req := svcSQS.DeleteMessageRequest(deleteMessageInput) - _, err := req.Send() - if err != nil { - return errors.Wrap(err, "DeleteMessageRequest failed") - } + req := svcSQS.DeleteMessageRequest(deleteMessageInput) + _, err := req.Send() + if err != nil { + return errors.Wrap(err, "DeleteMessageRequest failed") } return nil } -func createEvent(log string, s3Info s3Info) *beat.Event { +func createEvent(log string, offset int64, s3Info s3Info) *beat.Event { f := common.MapStr{ "message": log, "log": common.MapStr{ - "source": common.MapStr{ + "offset": offset, + "file.path": constructObjectURL(s3Info), + }, + "aws": common.MapStr{ + "s3": common.MapStr{ "bucket_name": s3Info.name, "object_key": s3Info.key, }, }, + "cloud": common.MapStr{ + "provider": "aws", + "region": s3Info.region, + }, } return &beat.Event{ Timestamp: time.Now(), Fields: f, } } + +func constructObjectURL(info s3Info) string { + return "https://" + info.name + ".s3-" + info.region + ".amazonaws.com/" + info.key +} diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 7c5fb56fa32..799ebceec75 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -22,7 +22,7 @@ type MockS3Client struct { s3iface.S3API } -var s3LogString = "36c1f05b76016b78528454e6e0c60e2b7ff7aa20c0a5e4c748276e5b0a2debd2 test-s3-ks [20/Jun/2019:04:07:48 +0000] 97.118.27.161 arn:aws:iam::627959692251:user/kaiyan.sheng@elastic.co 5141F2225A070122 REST.HEAD.OBJECT Screen%2BShot%2B2019-02-21%2Bat%2B2.15.50%2BPM.png" +var s3LogString = "36c1f test-s3-ks [20/Jun/2019:04:07:48 +0000] 97.118.27.161 arn:aws:iam::627959692251:user/kaiyan.sheng@elastic.co 5141F2225A070122 REST.HEAD.OBJECT Screen%2BShot%2B2019-02-21%2Bat%2B2.15.50%2BPM.png" func (m *MockS3Client) GetObjectRequest(input *s3.GetObjectInput) s3.GetObjectRequest { logBody := ioutil.NopCloser(bytes.NewReader([]byte(s3LogString))) @@ -46,6 +46,7 @@ func TestHandleMessage(t *testing.T) { cases := []struct { title string message sqs.Message + bucketNames []string expectedS3Infos []s3Info }{ { @@ -53,6 +54,7 @@ func TestHandleMessage(t *testing.T) { sqs.Message{ Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), }, + []string{}, []s3Info{ { name: "test-s3-ks-2", @@ -65,6 +67,7 @@ func TestHandleMessage(t *testing.T) { sqs.Message{ Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Delete\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), }, + []string{}, []s3Info{}, }, { @@ -72,17 +75,42 @@ func TestHandleMessage(t *testing.T) { sqs.Message{ Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:ec2\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), }, + []string{}, + []s3Info{}, + }, + { + "sqs message with right bucketNames", + sqs.Message{ + Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), + }, + []string{"ap-southeast-1"}, + []s3Info{ + { + name: "test-s3-ks-2", + key: "server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA", + }, + }, + }, + { + "sqs message with wrong bucketNames", + sqs.Message{ + Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), + }, + []string{"us-west-1"}, []s3Info{}, }, } + for _, c := range cases { - s3Info, err := handleMessage(c.message) - assert.NoError(t, err) - assert.Equal(t, len(c.expectedS3Infos), len(s3Info)) - if len(s3Info) > 0 { - assert.Equal(t, c.expectedS3Infos[0].key, s3Info[0].key) - assert.Equal(t, c.expectedS3Infos[0].name, s3Info[0].name) - } + t.Run(c.title, func(t *testing.T) { + s3Info, err := handleMessage(c.message, c.bucketNames) + assert.NoError(t, err) + assert.Equal(t, len(c.expectedS3Infos), len(s3Info)) + if len(s3Info) > 0 { + assert.Equal(t, c.expectedS3Infos[0].key, s3Info[0].key) + assert.Equal(t, c.expectedS3Infos[0].name, s3Info[0].name) + } + }) } } @@ -90,18 +118,63 @@ func TestReadS3Object(t *testing.T) { mockSvc := &MockS3Client{} s3Info := []s3Info{ { - name: "test-s3-ks-2", - key: "log2019-06-21-16-16-54", + name: "test-s3-ks-2", + key: "log2019-06-21-16-16-54", + region: "us-west-1", }, } events, err := readS3Object(mockSvc, s3Info) assert.NoError(t, err) assert.Equal(t, 1, len(events)) - bucketName, err := events[0].Fields.GetValue("log.source.bucket_name") - objectKey, err := events[0].Fields.GetValue("log.source.object_key") - message, err := events[0].Fields.GetValue("message") + + bucketName, err := events[0].Fields.GetValue("aws.s3.bucket_name") assert.NoError(t, err) assert.Equal(t, "test-s3-ks-2", bucketName.(string)) + + objectKey, err := events[0].Fields.GetValue("aws.s3.object_key") + assert.NoError(t, err) assert.Equal(t, "log2019-06-21-16-16-54", objectKey.(string)) + + cloudProvider, err := events[0].Fields.GetValue("cloud.provider") + assert.NoError(t, err) + assert.Equal(t, "aws", cloudProvider) + + region, err := events[0].Fields.GetValue("cloud.region") + assert.NoError(t, err) + assert.Equal(t, "us-west-1", region) + + message, err := events[0].Fields.GetValue("message") + assert.NoError(t, err) assert.Equal(t, s3LogString, message.(string)) } + +func TestConstructObjectURL(t *testing.T) { + cases := []struct { + title string + s3Info s3Info + expectedObjectURL string + }{ + {"construct with object in s3", + s3Info{ + name: "test-1", + key: "log2019-06-21-16-16-54", + region: "us-west-1", + }, + "https://test-1.s3-us-west-1.amazonaws.com/log2019-06-21-16-16-54", + }, + {"construct with object in a folder of s3", + s3Info{ + name: "test-2", + key: "test-folder-1/test-log-1.txt", + region: "us-east-1", + }, + "https://test-2.s3-us-east-1.amazonaws.com/test-folder-1/test-log-1.txt", + }, + } + for _, c := range cases { + t.Run(c.title, func(t *testing.T) { + objectURL := constructObjectURL(c.s3Info) + assert.Equal(t, c.expectedObjectURL, objectURL) + }) + } +} From b8d017a251690691e18cd1aaf602ed5e07a4acf3 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 26 Jun 2019 15:42:24 -0600 Subject: [PATCH 07/59] Added several methods to read resp.body to string --- x-pack/filebeat/input/s3/input.go | 99 ++++++++++++++++---------- x-pack/filebeat/input/s3/input_test.go | 9 ++- 2 files changed, 71 insertions(+), 37 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index d1dba58ef51..13fdef2d0c5 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -152,6 +152,7 @@ func (p *Input) Run() { var wg sync.WaitGroup numMessages := len(output.Messages) wg.Add(numMessages) + for i := range output.Messages { go func(m sqs.Message) { // launch goroutine to handle each message @@ -162,15 +163,12 @@ func (p *Input) Run() { p.logger.Error(err.Error()) } - if err != nil { - p.logger.Error(err.Error()) - } - // read from s3 - events, err := readS3Object(svcS3, s3Infos) + events, err := p.readS3Object(svcS3, s3Infos) if err != nil { p.logger.Error(err.Error()) } + for _, event := range events { d = &util.Data{Event: *event} err = forwarder.Send(d) @@ -257,39 +255,68 @@ func stringInSlice(name string, bucketNames []string) bool { return false } -func readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event, error) { +func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event, error) { var events []*beat.Event - for _, s3Info := range s3Infos { - s3GetObjectInput := &s3.GetObjectInput{ - Bucket: awssdk.String(s3Info.name), - Key: awssdk.String(s3Info.key), - } - objReq := svc.GetObjectRequest(s3GetObjectInput) - - objResp, err := objReq.Send() - if err != nil { - // What will happen if this object is not a log file or not readable ordoes not exist? - // 2019-06-25T17:21:57.406-0600 ERROR [s3] s3/input.go:220 s3 get object request failed: NoSuchKey: The specified key does not exist. - return nil, errors.Wrap(err, "s3 get object request failed") - } - - // TODO: check way to stream - buf := new(bytes.Buffer) - _, err = buf.ReadFrom(objResp.Body) - if err != nil { - return nil, errors.Wrap(err, "buf.ReadFrom failed") - } - - s := buf.String() - logLines := strings.Split(s, "\n") - for i, log := range logLines { - if log == "" { - continue - } + if len(s3Infos) > 0 { + var wg sync.WaitGroup + numS3Infos := len(s3Infos) + wg.Add(numS3Infos) + + for i := range s3Infos { + go func(s3Info s3Info) { + // launch goroutine to handle each message + defer wg.Done() + + s3GetObjectInput := &s3.GetObjectInput{ + Bucket: awssdk.String(s3Info.name), + Key: awssdk.String(s3Info.key), + } + req := svc.GetObjectRequest(s3GetObjectInput) + + resp, err := req.Send() + if err != nil { + p.logger.Error(errors.Wrap(err, "s3 get object request failed")) + } + + // method1 + buf := new(bytes.Buffer) + _, err = buf.ReadFrom(resp.Body) + if err != nil { + p.logger.Error(errors.Wrap(err, "s3 get object request failed")) + } + + logString := buf.String() + + // method2 + //var logString string + //p := make([]byte, 4) + //for { + // n, err := resp.Body.Read(p) + // if err == io.EOF { + // break + // } + // logString += string(p[:n]) + //} + + // method3 + //outFile, err := os.Create("test") + //defer outFile.Close() + //_, err = io.Copy(outFile, resp.Body) + //fileBytes , err := ioutil.ReadFile("test") + //logString := string(fileBytes) + + logLines := strings.Split(logString, "\n") + for i, log := range logLines { + if log == "" { + continue + } - // create event per log line - event := createEvent(log, int64(i), s3Info) - events = append(events, event) + // create event per log line + event := createEvent(log, int64(i), s3Info) + events = append(events, event) + } + }(s3Infos[i]) + wg.Wait() } } return events, nil diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 799ebceec75..4f2fc701d34 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -9,6 +9,8 @@ import ( "io/ioutil" "testing" + "github.com/elastic/beats/libbeat/logp" + "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/s3iface" @@ -115,6 +117,11 @@ func TestHandleMessage(t *testing.T) { } func TestReadS3Object(t *testing.T) { + p := &Input{ + started: false, + logger: logp.NewLogger(inputName), + } + mockSvc := &MockS3Client{} s3Info := []s3Info{ { @@ -123,7 +130,7 @@ func TestReadS3Object(t *testing.T) { region: "us-west-1", }, } - events, err := readS3Object(mockSvc, s3Info) + events, err := p.readS3Object(mockSvc, s3Info) assert.NoError(t, err) assert.Equal(t, 1, len(events)) From 6ae5e463669b0b01da578009152de10e06b6e3c4 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Thu, 27 Jun 2019 14:49:47 -0600 Subject: [PATCH 08/59] Use bufio.NewReader to read s3 GetObject response body --- x-pack/filebeat/input/s3/input.go | 51 +++++++++----------------- x-pack/filebeat/input/s3/input_test.go | 21 +++++++---- 2 files changed, 31 insertions(+), 41 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 13fdef2d0c5..5c00cdcacaa 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -5,8 +5,9 @@ package s3 import ( - "bytes" + "bufio" "encoding/json" + "io" "strings" "sync" "time" @@ -278,41 +279,23 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event p.logger.Error(errors.Wrap(err, "s3 get object request failed")) } - // method1 - buf := new(bytes.Buffer) - _, err = buf.ReadFrom(resp.Body) - if err != nil { - p.logger.Error(errors.Wrap(err, "s3 get object request failed")) - } - - logString := buf.String() - - // method2 - //var logString string - //p := make([]byte, 4) - //for { - // n, err := resp.Body.Read(p) - // if err == io.EOF { - // break - // } - // logString += string(p[:n]) - //} - - // method3 - //outFile, err := os.Create("test") - //defer outFile.Close() - //_, err = io.Copy(outFile, resp.Body) - //fileBytes , err := ioutil.ReadFile("test") - //logString := string(fileBytes) - - logLines := strings.Split(logString, "\n") - for i, log := range logLines { - if log == "" { - continue + reader := bufio.NewReader(resp.Body) + line := 0 + for { + log, err := reader.ReadString('\n') + if err != nil { + if err == io.EOF { + line += 1 + event := createEvent(log, int64(line), s3Info) + events = append(events, event) + break + } else { + p.logger.Error(errors.Wrap(err, "ReadString failed")) + } } - // create event per log line - event := createEvent(log, int64(i), s3Info) + line += 1 + event := createEvent(log, int64(line), s3Info) events = append(events, event) } }(s3Infos[i]) diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 4f2fc701d34..906be3d8182 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -24,10 +24,13 @@ type MockS3Client struct { s3iface.S3API } -var s3LogString = "36c1f test-s3-ks [20/Jun/2019:04:07:48 +0000] 97.118.27.161 arn:aws:iam::627959692251:user/kaiyan.sheng@elastic.co 5141F2225A070122 REST.HEAD.OBJECT Screen%2BShot%2B2019-02-21%2Bat%2B2.15.50%2BPM.png" +var ( + s3LogString1 = "36c1f test-s3-ks [20/Jun/2019] 1.2.3.4 arn:aws:iam::1234:user/kaiyan.sheng@elastic.co 5141F REST.HEAD.OBJECT Screen1.png \n" + s3LogString2 = "28kdg test-s3-ks [20/Jun/2019] 1.2.3.4 arn:aws:iam::1234:user/kaiyan.sheng@elastic.co 5A070 REST.HEAD.OBJECT Screen2.png" +) func (m *MockS3Client) GetObjectRequest(input *s3.GetObjectInput) s3.GetObjectRequest { - logBody := ioutil.NopCloser(bytes.NewReader([]byte(s3LogString))) + logBody := ioutil.NopCloser(bytes.NewReader([]byte(s3LogString1 + s3LogString2))) return s3.GetObjectRequest{ Request: &awssdk.Request{ Data: &s3.GetObjectOutput{ @@ -125,18 +128,18 @@ func TestReadS3Object(t *testing.T) { mockSvc := &MockS3Client{} s3Info := []s3Info{ { - name: "test-s3-ks-2", + name: "test-s3-ks", key: "log2019-06-21-16-16-54", region: "us-west-1", }, } events, err := p.readS3Object(mockSvc, s3Info) assert.NoError(t, err) - assert.Equal(t, 1, len(events)) + assert.Equal(t, 2, len(events)) bucketName, err := events[0].Fields.GetValue("aws.s3.bucket_name") assert.NoError(t, err) - assert.Equal(t, "test-s3-ks-2", bucketName.(string)) + assert.Equal(t, "test-s3-ks", bucketName.(string)) objectKey, err := events[0].Fields.GetValue("aws.s3.object_key") assert.NoError(t, err) @@ -150,9 +153,13 @@ func TestReadS3Object(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "us-west-1", region) - message, err := events[0].Fields.GetValue("message") + message1, err := events[0].Fields.GetValue("message") + assert.NoError(t, err) + assert.Equal(t, s3LogString1, message1.(string)) + + message2, err := events[1].Fields.GetValue("message") assert.NoError(t, err) - assert.Equal(t, s3LogString, message.(string)) + assert.Equal(t, s3LogString2, message2.(string)) } func TestConstructObjectURL(t *testing.T) { From 0c692bd0841319f071a7b45ec6e2601e7b5ffbc9 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Thu, 27 Jun 2019 17:42:51 -0600 Subject: [PATCH 09/59] Update changelog --- CHANGELOG.next.asciidoc | 1 + x-pack/filebeat/input/s3/input.go | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index b22e0fcc2c8..47fd6ed0595 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -251,6 +251,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Update module pipelines to enrich events with autonomous system fields. {pull}13036[13036] - Add module for ingesting IBM MQ logs. {pull}8782[8782] - Add S3 input to retrieve logs from AWS S3 buckets. +- Add S3 input to retrieve logs from AWS S3 buckets. {pull}12640[12640] {issue}12582[12582] *Heartbeat* diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 5c00cdcacaa..51a9732322d 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -285,7 +285,7 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event log, err := reader.ReadString('\n') if err != nil { if err == io.EOF { - line += 1 + line++ event := createEvent(log, int64(line), s3Info) events = append(events, event) break @@ -294,7 +294,7 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event } } // create event per log line - line += 1 + line++ event := createEvent(log, int64(line), s3Info) events = append(events, event) } From 1fdf6fed3cf54b821c04a589da0a922f83df8afb Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 28 Jun 2019 11:36:18 -0600 Subject: [PATCH 10/59] Change to push individual event when it's ready --- filebeat/docs/fields.asciidoc | 56 ------------ x-pack/filebeat/input/s3/_meta/fields.yml | 30 ------- x-pack/filebeat/input/s3/config.go | 5 +- x-pack/filebeat/input/s3/fields.go | 2 +- x-pack/filebeat/input/s3/input.go | 104 ++++++++++++++-------- x-pack/filebeat/input/s3/input_test.go | 60 ++++++++++--- 6 files changed, 117 insertions(+), 140 deletions(-) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 8ab90897b05..8f738732eb0 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -18,7 +18,6 @@ grouped in the following categories: * <> * <> * <> -* <> * <> * <> * <> @@ -36,7 +35,6 @@ grouped in the following categories: * <> * <> * <> -* <> * <> * <> * <> @@ -1404,33 +1402,6 @@ alias to: cloud.region -- -[[exported-fields-cloud]] -== cloud fields - -Cloud fields from s3 input. - - - -*`provider`*:: -+ --- -Name of the cloud provider for S3. - - -type: keyword - --- - -*`region`*:: -+ --- -Region name of the S3 bucket. - - -type: keyword - --- - [[exported-fields-coredns]] == Coredns fields @@ -7877,33 +7848,6 @@ type: long Name of organization associated with the autonomous system. -type: keyword - --- - -[[exported-fields-log]] -== log fields - -Log fields from s3 input. - - - -*`offset`*:: -+ --- -The file offset the reported line starts at. - - -type: long - --- - -*`file.path`*:: -+ --- -The objectURL of the file from which the line was read. - - type: keyword -- diff --git a/x-pack/filebeat/input/s3/_meta/fields.yml b/x-pack/filebeat/input/s3/_meta/fields.yml index 8013f6b6073..30c6766fc9e 100644 --- a/x-pack/filebeat/input/s3/_meta/fields.yml +++ b/x-pack/filebeat/input/s3/_meta/fields.yml @@ -1,18 +1,3 @@ -- key: log - title: "log" - description: > - Log fields from s3 input. - release: beta - fields: - - name: offset - type: long - description: > - The file offset the reported line starts at. - - name: file.path - type: keyword - description: > - The objectURL of the file from which the line was read. - - key: aws title: "aws" description: > @@ -32,18 +17,3 @@ type: keyword description: > Name of the S3 object that this log retrieved from. - -- key: cloud - title: "cloud" - description: > - Cloud fields from s3 input. - release: beta - fields: - - name: provider - type: keyword - description: > - Name of the cloud provider for S3. - - name: region - type: keyword - description: > - Region name of the S3 bucket. diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index 612cb67ad04..e9abf699412 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -16,7 +16,8 @@ type config struct { harvester.ForwarderConfig `config:",inline"` QueueURLs []string `config:"queueURLs" validate:"nonzero,required"` BucketNames []string `config:"bucketNames"` - AccessKeyID string `config:"access_key_id" validate:"nonzero,required"` - SecretAccessKey string `config:"secret_access_key" validate:"nonzero,required"` + AccessKeyID string `config:"access_key_id"` + SecretAccessKey string `config:"secret_access_key"` SessionToken string `config:"session_token"` + SharedConfigProfile string `config:"shared_config_profile"` } diff --git a/x-pack/filebeat/input/s3/fields.go b/x-pack/filebeat/input/s3/fields.go index 9839b2a4172..ee9b3dccda4 100644 --- a/x-pack/filebeat/input/s3/fields.go +++ b/x-pack/filebeat/input/s3/fields.go @@ -19,5 +19,5 @@ func init() { // AssetS3 returns asset data. // This is the base64 encoded gzipped contents of input/s3. func AssetS3() string { - return "eJyskz9v3DAMxXd9iofs8eLNQ4Gia9Ah186BzqJs1bJpUHQMf/tCipP6kH8IrhoMSBT5I9+TbzHQ1iByZwANGqnBTeTuxgCOUith1sBTg28GAO64gw8UXYIXHpFqhGletDKAUCSbqMGZ1Brs95qSd4vJjtSAvU+k5QjQbaaMnrr94A1gXr96gg+R9mxoTxCaWZQcYpgISa1ogi19/KPlpGq22l8AB9pWFvc5k89/qNXf93dgX6CliTL32oe2L2eFv9oEIesqY3ZB7ZoOgto1vSPo9zVdJWiqL2brhJf548lOdS5ss3Zh8iyjzTeqPX5kHDnnpR1IH/LmJfa2oB+g8/ppR3rW81TvdaG9zZ+Q8kuEkEqgR3JFk+pVM0/GPAy0/ddensp+0suzwW3kxR0sLvt3TP6RY1fZPAs/Bkfy9Yd8nLH0+FILngWn+vKXEeoCT1/H3Je8UuSVu5X5GwAA//9i6E0j" + return "eJyskL1uwzAMhHc9xSF7snjTUKAv0KUPENDWOVH9I0Oka/jtCzluYaAtupSDAJKn+w48o+PqIYs6wKL19DjJoicHBGqT42QxjR5PDgCeF0Ub2QdFm9MArRDHabaLAzJ7itKjponDrvPbvzNGGeih1dYCtk70uOU0T/vkB1qp16oYizEgjm3KgxTFZd8fGUdOPTcd7Vqar90ntOO6pBwO81/QpV5kIFILu7NEefjC7lKeqOjTDZmWI98ZtptcvoVJ9Rsbu3Zc/zXLw/aPLB8BAAD//7PMioY=" } diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 51a9732322d..73f83a82bd3 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -12,6 +12,8 @@ import ( "sync" "time" + "github.com/aws/aws-sdk-go-v2/aws/external" + awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/aws/defaults" "github.com/aws/aws-sdk-go-v2/service/s3" @@ -103,26 +105,46 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return p, nil } -// Run runs the input -func (p *Input) Run() { - p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) +func (p *Input) getAWSCredentials() awssdk.Config { + // Check if accessKeyID and secretAccessKey is given from configuration + if p.config.AccessKeyID != "" && p.config.SecretAccessKey != "" { + awsConfig := defaults.Config() + awsCredentials := awssdk.Credentials{ + AccessKeyID: p.config.AccessKeyID, + SecretAccessKey: p.config.SecretAccessKey, + } + if p.config.SessionToken != "" { + awsCredentials.SessionToken = p.config.SessionToken + } - awsConfig := defaults.Config() - awsCredentials := awssdk.Credentials{ - AccessKeyID: p.config.AccessKeyID, - SecretAccessKey: p.config.SecretAccessKey, + awsConfig.Credentials = awssdk.StaticCredentialsProvider{ + Value: awsCredentials, + } + return awsConfig } - if p.config.SessionToken != "" { - awsCredentials.SessionToken = p.config.SessionToken + + // If accessKeyID and secretAccessKey is not given, then load from default config + var awsConfig awssdk.Config + var err error + if p.config.SharedConfigProfile != "" { + awsConfig, err = external.LoadDefaultAWSConfig( + external.WithSharedConfigProfile(p.config.SharedConfigProfile), + ) + } else { + awsConfig, err = external.LoadDefaultAWSConfig() } - awsConfig.Credentials = awssdk.StaticCredentialsProvider{ - Value: awsCredentials, + if err != nil { + p.logger.Error(errors.Wrap(err, "failed to load default config")) } + return awsConfig +} + +// Run runs the input +func (p *Input) Run() { + p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) - forwarder := harvester.NewForwarder(p.outlet) for _, queueURL := range p.config.QueueURLs { - var d *util.Data regionName, err := getRegionFromQueueURL(queueURL) if err != nil { p.logger.Errorf("failed to get region name from queueURL: %s", queueURL) @@ -165,19 +187,11 @@ func (p *Input) Run() { } // read from s3 - events, err := p.readS3Object(svcS3, s3Infos) + p.readS3Object(svcS3, s3Infos) if err != nil { p.logger.Error(err.Error()) } - for _, event := range events { - d = &util.Data{Event: *event} - err = forwarder.Send(d) - if err != nil { - p.logger.Error(errors.Wrap(err, "forwarder send failed")) - } - } - // delete message after events are sent err = deleteMessage(queueURL, *m.ReceiptHandle, svcSQS) if err != nil { @@ -256,8 +270,7 @@ func stringInSlice(name string, bucketNames []string) bool { return false } -func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event, error) { - var events []*beat.Event +func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) { if len(s3Infos) > 0 { var wg sync.WaitGroup numS3Infos := len(s3Infos) @@ -268,26 +281,19 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event // launch goroutine to handle each message defer wg.Done() - s3GetObjectInput := &s3.GetObjectInput{ - Bucket: awssdk.String(s3Info.name), - Key: awssdk.String(s3Info.key), - } - req := svc.GetObjectRequest(s3GetObjectInput) - - resp, err := req.Send() + // read from s3 object + reader, err := bufferedIORead(svc, s3Info) if err != nil { p.logger.Error(errors.Wrap(err, "s3 get object request failed")) } - reader := bufio.NewReader(resp.Body) line := 0 for { log, err := reader.ReadString('\n') if err != nil { - if err == io.EOF { + if err == io.EOF && log != "" { line++ - event := createEvent(log, int64(line), s3Info) - events = append(events, event) + p.forwardEvent(createEvent(log, int64(line), s3Info)) break } else { p.logger.Error(errors.Wrap(err, "ReadString failed")) @@ -295,14 +301,36 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) ([]*beat.Event } // create event per log line line++ - event := createEvent(log, int64(line), s3Info) - events = append(events, event) + p.forwardEvent(createEvent(log, int64(line), s3Info)) } }(s3Infos[i]) wg.Wait() } } - return events, nil +} + +func bufferedIORead(svc s3iface.S3API, s3Info s3Info) (*bufio.Reader, error) { + s3GetObjectInput := &s3.GetObjectInput{ + Bucket: awssdk.String(s3Info.name), + Key: awssdk.String(s3Info.key), + } + req := svc.GetObjectRequest(s3GetObjectInput) + + resp, err := req.Send() + if err != nil { + return nil, errors.Wrap(err, "s3 get object request failed") + } + + return bufio.NewReader(resp.Body), nil +} + +func (p *Input) forwardEvent(event *beat.Event) { + forwarder := harvester.NewForwarder(p.outlet) + d := &util.Data{Event: *event} + err := forwarder.Send(d) + if err != nil { + p.logger.Error(errors.Wrap(err, "forwarder send failed")) + } } func deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.SQS) error { diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 906be3d8182..43659a8372a 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -6,10 +6,11 @@ package s3 import ( "bytes" + "io" "io/ioutil" "testing" - "github.com/elastic/beats/libbeat/logp" + "github.com/elastic/beats/libbeat/beat" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/s3iface" @@ -26,7 +27,13 @@ type MockS3Client struct { var ( s3LogString1 = "36c1f test-s3-ks [20/Jun/2019] 1.2.3.4 arn:aws:iam::1234:user/kaiyan.sheng@elastic.co 5141F REST.HEAD.OBJECT Screen1.png \n" - s3LogString2 = "28kdg test-s3-ks [20/Jun/2019] 1.2.3.4 arn:aws:iam::1234:user/kaiyan.sheng@elastic.co 5A070 REST.HEAD.OBJECT Screen2.png" + s3LogString2 = "28kdg test-s3-ks [20/Jun/2019] 1.2.3.4 arn:aws:iam::1234:user/kaiyan.sheng@elastic.co 5A070 REST.HEAD.OBJECT Screen2.png \n" + mockSvc = &MockS3Client{} + info = s3Info{ + name: "test-s3-ks", + key: "log2019-06-21-16-16-54", + region: "us-west-1", + } ) func (m *MockS3Client) GetObjectRequest(input *s3.GetObjectInput) s3.GetObjectRequest { @@ -119,22 +126,49 @@ func TestHandleMessage(t *testing.T) { } } -func TestReadS3Object(t *testing.T) { - p := &Input{ - started: false, - logger: logp.NewLogger(inputName), +func TestBufferedIORead(t *testing.T) { + reader, err := bufferedIORead(mockSvc, info) + assert.NoError(t, err) + for i := 0; i < 3; i++ { + switch i { + case 0: + log, err := reader.ReadString('\n') + assert.NoError(t, err) + assert.Equal(t, s3LogString1, log) + case 1: + log, err := reader.ReadString('\n') + assert.NoError(t, err) + assert.Equal(t, s3LogString2, log) + case 2: + log, err := reader.ReadString('\n') + assert.Error(t, io.EOF, err) + assert.Equal(t, "", log) + } } +} +func TestCreateEvent(t *testing.T) { mockSvc := &MockS3Client{} - s3Info := []s3Info{ - { - name: "test-s3-ks", - key: "log2019-06-21-16-16-54", - region: "us-west-1", - }, + s3Info := s3Info{ + name: "test-s3-ks", + key: "log2019-06-21-16-16-54", + region: "us-west-1", } - events, err := p.readS3Object(mockSvc, s3Info) + + reader, err := bufferedIORead(mockSvc, s3Info) assert.NoError(t, err) + var events []*beat.Event + for { + line, err := reader.ReadString('\n') + if err == io.EOF { + event := createEvent(line, int64(0), s3Info) + events = append(events, event) + break + } + event := createEvent(line, int64(0), s3Info) + events = append(events, event) + } + assert.Equal(t, 2, len(events)) bucketName, err := events[0].Fields.GetValue("aws.s3.bucket_name") From 0f44668f37db60d59776c37e4535c45809b93b5a Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 28 Jun 2019 13:06:40 -0600 Subject: [PATCH 11/59] add getAWSCredentials function --- x-pack/filebeat/input/s3/input.go | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 73f83a82bd3..66b6492fb5d 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -143,6 +143,7 @@ func (p *Input) getAWSCredentials() awssdk.Config { // Run runs the input func (p *Input) Run() { p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) + awsConfig := p.getAWSCredentials() for _, queueURL := range p.config.QueueURLs { regionName, err := getRegionFromQueueURL(queueURL) From 605e51c70d2ff10eb5b3c2e0961b7f3e9ea2ecf2 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 28 Jun 2019 17:39:46 -0600 Subject: [PATCH 12/59] Add shared_config_profile option and read from ~/.aws/credentials --- x-pack/filebeat/input/s3/input.go | 71 +++++++++++++------------- x-pack/filebeat/input/s3/input_test.go | 9 ++-- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 66b6492fb5d..df98d21c5f8 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -61,11 +61,10 @@ func init() { // Input is a input for s3 type Input struct { - started bool - outlet channel.Outleter - config config - cfg *common.Config - logger *logp.Logger + outlet channel.Outleter + config config + awsConfig awssdk.Config + logger *logp.Logger } type s3Info struct { @@ -91,60 +90,57 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input } if len(config.QueueURLs) == 0 { - return nil, errors.Wrap(err, "No sqs queueURLs configured") + return nil, errors.Wrap(err, "no sqs queueURLs configured") + } + + awsConfig, err := getAWSCredentials(config) + if err != nil { + return nil, errors.Wrap(err, "getAWSCredentials failed") } p := &Input{ - started: false, - outlet: outlet, - cfg: cfg, - config: config, - logger: logger, + //started: false, + outlet: outlet, + //cfg: cfg, + config: config, + awsConfig: awsConfig, + logger: logger, } return p, nil } -func (p *Input) getAWSCredentials() awssdk.Config { +func getAWSCredentials(config config) (awssdk.Config, error) { // Check if accessKeyID and secretAccessKey is given from configuration - if p.config.AccessKeyID != "" && p.config.SecretAccessKey != "" { + if config.AccessKeyID != "" && config.SecretAccessKey != "" { awsConfig := defaults.Config() awsCredentials := awssdk.Credentials{ - AccessKeyID: p.config.AccessKeyID, - SecretAccessKey: p.config.SecretAccessKey, + AccessKeyID: config.AccessKeyID, + SecretAccessKey: config.SecretAccessKey, } - if p.config.SessionToken != "" { - awsCredentials.SessionToken = p.config.SessionToken + if config.SessionToken != "" { + awsCredentials.SessionToken = config.SessionToken } awsConfig.Credentials = awssdk.StaticCredentialsProvider{ Value: awsCredentials, } - return awsConfig + return awsConfig, nil } // If accessKeyID and secretAccessKey is not given, then load from default config - var awsConfig awssdk.Config - var err error - if p.config.SharedConfigProfile != "" { - awsConfig, err = external.LoadDefaultAWSConfig( - external.WithSharedConfigProfile(p.config.SharedConfigProfile), + if config.SharedConfigProfile != "" { + return external.LoadDefaultAWSConfig( + external.WithSharedConfigProfile(config.SharedConfigProfile), ) } else { - awsConfig, err = external.LoadDefaultAWSConfig() + return external.LoadDefaultAWSConfig() } - - if err != nil { - p.logger.Error(errors.Wrap(err, "failed to load default config")) - } - return awsConfig } // Run runs the input func (p *Input) Run() { p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) - awsConfig := p.getAWSCredentials() - for _, queueURL := range p.config.QueueURLs { regionName, err := getRegionFromQueueURL(queueURL) if err != nil { @@ -152,6 +148,7 @@ func (p *Input) Run() { continue } + awsConfig := p.awsConfig.Copy() awsConfig.Region = regionName svcSQS := sqs.New(awsConfig) svcS3 := s3.New(awsConfig) @@ -168,7 +165,8 @@ func (p *Input) Run() { req := svcSQS.ReceiveMessageRequest(receiveMessageInput) output, errR := req.Send() if errR != nil { - return + p.logger.Errorf("failed to receive message from SQS:", err) + continue } // process messages @@ -189,9 +187,6 @@ func (p *Input) Run() { // read from s3 p.readS3Object(svcS3, s3Infos) - if err != nil { - p.logger.Error(err.Error()) - } // delete message after events are sent err = deleteMessage(queueURL, *m.ReceiptHandle, svcSQS) @@ -291,8 +286,12 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) { line := 0 for { log, err := reader.ReadString('\n') + if log == "" { + break + } + if err != nil { - if err == io.EOF && log != "" { + if err == io.EOF { line++ p.forwardEvent(createEvent(log, int64(line), s3Info)) break diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 43659a8372a..0dfa895e264 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -159,13 +159,16 @@ func TestCreateEvent(t *testing.T) { assert.NoError(t, err) var events []*beat.Event for { - line, err := reader.ReadString('\n') + log, err := reader.ReadString('\n') + if log == "" { + break + } if err == io.EOF { - event := createEvent(line, int64(0), s3Info) + event := createEvent(log, int64(0), s3Info) events = append(events, event) break } - event := createEvent(line, int64(0), s3Info) + event := createEvent(log, int64(0), s3Info) events = append(events, event) } From 9c4aaee1076eff37307b9ad55af63e60b55c4ced Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Sat, 29 Jun 2019 22:15:00 -0600 Subject: [PATCH 13/59] Update aws credentials with credential_profile_name --- x-pack/filebeat/_meta/common.reference.inputs.yml | 6 +++++- x-pack/filebeat/filebeat.reference.yml | 6 +++++- x-pack/filebeat/input/s3/config.go | 2 +- x-pack/filebeat/input/s3/input.go | 9 +++++---- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/x-pack/filebeat/_meta/common.reference.inputs.yml b/x-pack/filebeat/_meta/common.reference.inputs.yml index e72a828ad7c..3965e8801c1 100644 --- a/x-pack/filebeat/_meta/common.reference.inputs.yml +++ b/x-pack/filebeat/_meta/common.reference.inputs.yml @@ -59,12 +59,16 @@ #- type: s3 #enabled: false - # AWS Credentials (required) + # AWS Credentials + # If access_key_id and secret_access_key are configured, then use them to make api calls. + # If not, s3 input will load default AWS config or load with given profile name. #access_key_id: '${AWS_ACCESS_KEY_ID:""}' #secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' #session_token: '${AWS_SESSION_TOKEN:"”}' #credential_profile_name: test-s3-input + #credential_profile_name: test-s3-input + # QueueURLs (required) to receive queue messages from #queueURLs: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index d323192a853..29e77e91c1d 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -978,12 +978,16 @@ filebeat.inputs: #- type: s3 #enabled: false - # AWS Credentials (required) + # AWS Credentials + # If access_key_id and secret_access_key are configured, then use them to make api calls. + # If not, s3 input will load default AWS config or load with given profile name. #access_key_id: '${AWS_ACCESS_KEY_ID:""}' #secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' #session_token: '${AWS_SESSION_TOKEN:"”}' #credential_profile_name: test-s3-input + #credential_profile_name: test-s3-input + # QueueURLs (required) to receive queue messages from #queueURLs: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index e9abf699412..7a5e20e898f 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -19,5 +19,5 @@ type config struct { AccessKeyID string `config:"access_key_id"` SecretAccessKey string `config:"secret_access_key"` SessionToken string `config:"session_token"` - SharedConfigProfile string `config:"shared_config_profile"` + ProfileName string `config:"credential_profile_name"` } diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index df98d21c5f8..6d25b9146fe 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -129,13 +129,14 @@ func getAWSCredentials(config config) (awssdk.Config, error) { } // If accessKeyID and secretAccessKey is not given, then load from default config - if config.SharedConfigProfile != "" { + // Please see https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html + // with more details. + if config.ProfileName != "" { return external.LoadDefaultAWSConfig( - external.WithSharedConfigProfile(config.SharedConfigProfile), + external.WithSharedConfigProfile(config.ProfileName), ) - } else { - return external.LoadDefaultAWSConfig() } + return external.LoadDefaultAWSConfig() } // Run runs the input From 87b2307c0e75b989bd2114ea0cb31766fb072a9d Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 1 Jul 2019 13:51:36 -0600 Subject: [PATCH 14/59] fix wg.wait and change log.offset to be byte size instead of line number --- x-pack/filebeat/input/s3/input.go | 28 ++++++++++++++------------ x-pack/filebeat/input/s3/input_test.go | 12 +++++------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 6d25b9146fe..e09253c49f1 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -177,25 +177,26 @@ func (p *Input) Run() { wg.Add(numMessages) for i := range output.Messages { - go func(m sqs.Message) { - // launch goroutine to handle each message + go func(message sqs.Message) { + // launch goroutine to handle each message from sqs defer wg.Done() - s3Infos, err := handleMessage(m, p.config.BucketNames) + s3Infos, err := handleMessage(message, p.config.BucketNames) if err != nil { p.logger.Error(err.Error()) } - // read from s3 + // read from s3 object and create event for each log line p.readS3Object(svcS3, s3Infos) // delete message after events are sent - err = deleteMessage(queueURL, *m.ReceiptHandle, svcSQS) + err = deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) if err != nil { p.logger.Error(errors.Wrap(err, "deleteMessages failed")) } }(output.Messages[i]) } + wg.Wait() } } } @@ -284,7 +285,7 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) { p.logger.Error(errors.Wrap(err, "s3 get object request failed")) } - line := 0 + offset := 0 for { log, err := reader.ReadString('\n') if log == "" { @@ -293,20 +294,21 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) { if err != nil { if err == io.EOF { - line++ - p.forwardEvent(createEvent(log, int64(line), s3Info)) + offset += len([]byte(log)) + p.forwardEvent(createEvent(log, offset, s3Info)) break } else { p.logger.Error(errors.Wrap(err, "ReadString failed")) } } + // create event per log line - line++ - p.forwardEvent(createEvent(log, int64(line), s3Info)) + offset += len([]byte(log)) + p.forwardEvent(createEvent(log, offset, s3Info)) } }(s3Infos[i]) - wg.Wait() } + wg.Wait() } } @@ -348,11 +350,11 @@ func deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.SQ return nil } -func createEvent(log string, offset int64, s3Info s3Info) *beat.Event { +func createEvent(log string, offset int, s3Info s3Info) *beat.Event { f := common.MapStr{ "message": log, "log": common.MapStr{ - "offset": offset, + "offset": int64(offset), "file.path": constructObjectURL(s3Info), }, "aws": common.MapStr{ diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 0dfa895e264..df33ae8936a 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -10,14 +10,13 @@ import ( "io/ioutil" "testing" - "github.com/elastic/beats/libbeat/beat" - + awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/s3iface" - - awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/sqs" "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/libbeat/beat" ) // MockS3Client struct is used for unit tests. @@ -164,11 +163,12 @@ func TestCreateEvent(t *testing.T) { break } if err == io.EOF { - event := createEvent(log, int64(0), s3Info) + event := createEvent(log, len([]byte(log)), s3Info) events = append(events, event) break } - event := createEvent(log, int64(0), s3Info) + + event := createEvent(log, len([]byte(log)), s3Info) events = append(events, event) } From 68f1bf7ffdbfa06127afde483bb6685f01a6d715 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 1 Jul 2019 19:56:00 -0600 Subject: [PATCH 15/59] ChangeMessageVisibility when process one message longer than 1/2 visibility timeout --- x-pack/filebeat/input/s3/input.go | 44 ++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index e09253c49f1..bdaacd40cba 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -12,10 +12,9 @@ import ( "sync" "time" - "github.com/aws/aws-sdk-go-v2/aws/external" - awssdk "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/aws/defaults" + "github.com/aws/aws-sdk-go-v2/aws/external" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/s3iface" "github.com/aws/aws-sdk-go-v2/service/sqs" @@ -49,7 +48,9 @@ var ( // retrieve requests after being retrieved by a ReceiveMessage request. // This value needs to be a lot bigger than filebeat collection frequency so // if it took too long to read the s3 log, this sqs message will not be reprocessed. - visibilityTimeout int64 = 300 + // The default visibility timeout for a message is 30 seconds. The minimum + // is 0 seconds. The maximum is 12 hours. + visibilityTimeout int64 = 30 ) func init() { @@ -99,9 +100,7 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input } p := &Input{ - //started: false, - outlet: outlet, - //cfg: cfg, + outlet: outlet, config: config, awsConfig: awsConfig, logger: logger, @@ -163,6 +162,12 @@ func (p *Input) Run() { WaitTimeSeconds: &waitTimeSecond, } + // update message visibility timeout to make sure filebeat can finish reading + changeMessageVisibilityInput := &sqs.ChangeMessageVisibilityInput{ + QueueUrl: &queueURL, + VisibilityTimeout: &visibilityTimeout, + } + req := svcSQS.ReceiveMessageRequest(receiveMessageInput) output, errR := req.Send() if errR != nil { @@ -177,8 +182,9 @@ func (p *Input) Run() { wg.Add(numMessages) for i := range output.Messages { + // launch goroutine to handle each message from sqs + c := make(chan struct{}) go func(message sqs.Message) { - // launch goroutine to handle each message from sqs defer wg.Done() s3Infos, err := handleMessage(message, p.config.BucketNames) @@ -195,6 +201,19 @@ func (p *Input) Run() { p.logger.Error(errors.Wrap(err, "deleteMessages failed")) } }(output.Messages[i]) + select { + case <-c: + close(c) + case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): + // if half of the set visibilityTimeout passed and this is + // still ongoing, then change visibility timeout. + changeMessageVisibilityInput.ReceiptHandle = output.Messages[i].ReceiptHandle + req := svcSQS.ChangeMessageVisibilityRequest(changeMessageVisibilityInput) + _, err = req.Send() + if err != nil { + p.logger.Error(errors.Wrap(err, "change message visibility failed")) + } + } } wg.Wait() } @@ -222,14 +241,14 @@ func getRegionFromQueueURL(queueURL string) (string, error) { } // handle message -func handleMessage(m sqs.Message, bucketNames []string) (s3Infos []s3Info, err error) { +func handleMessage(m sqs.Message, bucketNames []string) ([]s3Info, error) { msg := map[string]interface{}{} - err = json.Unmarshal([]byte(*m.Body), &msg) + err := json.Unmarshal([]byte(*m.Body), &msg) if err != nil { - err = errors.Wrap(err, "json unmarshal sqs message body failed") - return + return nil, errors.Wrap(err, "json unmarshal sqs message body failed") } + var s3Infos []s3Info records := msg["Records"].([]interface{}) for _, record := range records { recordMap := record.(map[string]interface{}) @@ -250,7 +269,7 @@ func handleMessage(m sqs.Message, bucketNames []string) (s3Infos []s3Info, err e s3Infos = append(s3Infos, s3Info) } } - return + return s3Infos, nil } // stringInSlice checks if a string is already exists in list @@ -283,6 +302,7 @@ func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) { reader, err := bufferedIORead(svc, s3Info) if err != nil { p.logger.Error(errors.Wrap(err, "s3 get object request failed")) + return } offset := 0 From 0a16db3c30dbdbb21a8cd7882aa481e92aba57ce Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 2 Jul 2019 14:05:46 -0600 Subject: [PATCH 16/59] Add changeMessageVisibility and check time spend on reading log file --- x-pack/filebeat/input/s3/input.go | 39 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index bdaacd40cba..1ce0f375c84 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -50,7 +50,7 @@ var ( // if it took too long to read the s3 log, this sqs message will not be reprocessed. // The default visibility timeout for a message is 30 seconds. The minimum // is 0 seconds. The maximum is 12 hours. - visibilityTimeout int64 = 30 + visibilityTimeout int64 = 300 ) func init() { @@ -153,39 +153,40 @@ func (p *Input) Run() { svcSQS := sqs.New(awsConfig) svcS3 := s3.New(awsConfig) - // receive messages - receiveMessageInput := &sqs.ReceiveMessageInput{ - QueueUrl: &queueURL, - MessageAttributeNames: []string{"All"}, - MaxNumberOfMessages: &maxNumberOfMessage, - VisibilityTimeout: &visibilityTimeout, - WaitTimeSeconds: &waitTimeSecond, - } - - // update message visibility timeout to make sure filebeat can finish reading + // update message visibility timeout if it's taking longer than 1/2 of + // visibilityTimeout to make sure filebeat can finish reading changeMessageVisibilityInput := &sqs.ChangeMessageVisibilityInput{ QueueUrl: &queueURL, VisibilityTimeout: &visibilityTimeout, } - req := svcSQS.ReceiveMessageRequest(receiveMessageInput) - output, errR := req.Send() - if errR != nil { - p.logger.Errorf("failed to receive message from SQS:", err) + // receive messages + req := svcSQS.ReceiveMessageRequest( + &sqs.ReceiveMessageInput{ + QueueUrl: &queueURL, + MessageAttributeNames: []string{"All"}, + MaxNumberOfMessages: &maxNumberOfMessage, + VisibilityTimeout: &visibilityTimeout, + WaitTimeSeconds: &waitTimeSecond, + }) + output, err := req.Send() + if err != nil { + p.logger.Error("failed to receive message from SQS:", err) continue } - // process messages + // process messages received from sqs if len(output.Messages) > 0 { var wg sync.WaitGroup numMessages := len(output.Messages) wg.Add(numMessages) for i := range output.Messages { + done := make(chan struct{}) // launch goroutine to handle each message from sqs - c := make(chan struct{}) go func(message sqs.Message) { defer wg.Done() + defer close(done) s3Infos, err := handleMessage(message, p.config.BucketNames) if err != nil { @@ -201,9 +202,9 @@ func (p *Input) Run() { p.logger.Error(errors.Wrap(err, "deleteMessages failed")) } }(output.Messages[i]) + select { - case <-c: - close(c) + case <-done: case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): // if half of the set visibilityTimeout passed and this is // still ongoing, then change visibility timeout. From 6121b5701ac9ec070847c6896cf77d72eb33e00f Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 2 Jul 2019 14:23:47 -0600 Subject: [PATCH 17/59] rebase --- x-pack/filebeat/_meta/common.reference.inputs.yml | 2 -- x-pack/filebeat/filebeat.reference.yml | 2 -- x-pack/filebeat/input/s3/input.go | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/x-pack/filebeat/_meta/common.reference.inputs.yml b/x-pack/filebeat/_meta/common.reference.inputs.yml index 3965e8801c1..13d4dd15995 100644 --- a/x-pack/filebeat/_meta/common.reference.inputs.yml +++ b/x-pack/filebeat/_meta/common.reference.inputs.yml @@ -67,8 +67,6 @@ #session_token: '${AWS_SESSION_TOKEN:"”}' #credential_profile_name: test-s3-input - #credential_profile_name: test-s3-input - # QueueURLs (required) to receive queue messages from #queueURLs: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index 29e77e91c1d..d36bab01186 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -986,8 +986,6 @@ filebeat.inputs: #session_token: '${AWS_SESSION_TOKEN:"”}' #credential_profile_name: test-s3-input - #credential_profile_name: test-s3-input - # QueueURLs (required) to receive queue messages from #queueURLs: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 1ce0f375c84..662102c897b 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -168,7 +168,7 @@ func (p *Input) Run() { MaxNumberOfMessages: &maxNumberOfMessage, VisibilityTimeout: &visibilityTimeout, WaitTimeSeconds: &waitTimeSecond, - }) + }) output, err := req.Send() if err != nil { p.logger.Error("failed to receive message from SQS:", err) From aa70bd8b48703d821fbee361cb7c2ec436b33701 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 3 Jul 2019 15:26:39 -0600 Subject: [PATCH 18/59] Move select into a separate go routine --- .../_meta/common.reference.inputs.yml | 10 +-- .../docs/inputs/input-aws-s3.asciidoc | 75 +++++++++++++++++++ x-pack/filebeat/filebeat.reference.yml | 10 +-- x-pack/filebeat/input/s3/config.go | 4 +- x-pack/filebeat/input/s3/input.go | 26 ++++--- 5 files changed, 102 insertions(+), 23 deletions(-) create mode 100644 x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc diff --git a/x-pack/filebeat/_meta/common.reference.inputs.yml b/x-pack/filebeat/_meta/common.reference.inputs.yml index 13d4dd15995..da89243a3e4 100644 --- a/x-pack/filebeat/_meta/common.reference.inputs.yml +++ b/x-pack/filebeat/_meta/common.reference.inputs.yml @@ -55,7 +55,7 @@ credentials_file: ${path.config}/my-pubsub-subscriber-credentials.json #------------------------------ S3 input -------------------------------- -# Experimental: Config options for AWS S3 input +# Beta: Config options for AWS S3 input #- type: s3 #enabled: false @@ -67,8 +67,8 @@ #session_token: '${AWS_SESSION_TOKEN:"”}' #credential_profile_name: test-s3-input - # QueueURLs (required) to receive queue messages from - #queueURLs: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] + # Queue urls (required) to receive queue messages from + #queue_urls: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] - # S3buckets to collect logs from - # bucketNames: ["bucket-test-1", "bucket-test-2"] + # S3 buckets to collect logs from + #bucket_names: ["bucket-test-1", "bucket-test-2"] diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc new file mode 100644 index 00000000000..abd8d28b9dd --- /dev/null +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -0,0 +1,75 @@ +[role="xpack"] + +:type: s3 + +[id="{beatname_lc}-input-{type}"] +=== s3 input + +++++ +s3 +++++ + +beta[] + +Use the `s3` input to retrieve logs from S3 objects that are pointed by messages +from specific SQS queues. + +This input can, for example, be used to receive S3 server access logs to monitor +detailed records for the requests that are made to a bucket. + +Example configuration 1: + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: s3 + queue_urls: ["https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue"] + bucket_names: ["test-s3-bucket"] + access_key_id: '${AWS_ACCESS_KEY_ID:""}' + secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' + session_token: '${AWS_SESSION_TOKEN:"”}' +---- + +Example configuration 2: + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: s3 + queue_urls: ["https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue"] + bucket_names: ["test-s3-bucket"] + credential_profile_name: test-s3-input +---- + +The `s3` input supports the following configuration options plus the +<<{beatname_lc}-input-{type}-common-options>> described later. + +[float] +==== `queue_urls` + +URLs of the AWS SQS queues that messages will be received from. Required. + +[float] +==== `bucket_names` + +Name of the S3 buckets to read log files from. If it's not specified, then read +from all S3 buckets that are pointed by SQS messages. + +[float] +==== `aws credentials` + +In order to make AWS API calls, `s3` input requires AWS credentials. Users can +either put the values into the configuration for `access_key_id`, +`secret_access_key` and/or `session_token`, or use environment variable +`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and/or `AWS_SESSION_TOKEN` instead. +Please use `Example configuration 1` as an example. + +Or shared AWS credentials file can be used with `credential_profile_name` as an +optional configuration. Please use `Example configuration 2` as an example. If +`credential_profile_name` is not specified, then `s3` input will consume +credentials from shared AWS credentials file with `default` profile name. + +[id="{beatname_lc}-input-{type}-common-options"] +include::../../../../filebeat/docs/inputs/input-common-options.asciidoc[] + +:type!: diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index d36bab01186..e805e7c98dc 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -974,7 +974,7 @@ filebeat.inputs: credentials_file: ${path.config}/my-pubsub-subscriber-credentials.json #------------------------------ S3 input -------------------------------- -# Experimental: Config options for AWS S3 input +# Beta: Config options for AWS S3 input #- type: s3 #enabled: false @@ -986,11 +986,11 @@ filebeat.inputs: #session_token: '${AWS_SESSION_TOKEN:"”}' #credential_profile_name: test-s3-input - # QueueURLs (required) to receive queue messages from - #queueURLs: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] + # Queue urls (required) to receive queue messages from + #queue_urls: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] - # S3buckets to collect logs from - # bucketNames: ["bucket-test-1", "bucket-test-2"] + # S3 buckets to collect logs from + #bucket_names: ["bucket-test-1", "bucket-test-2"] #========================== Filebeat autodiscover ============================== # Autodiscover allows you to detect changes in the system and spawn new modules diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index 7a5e20e898f..96d072ff90e 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -14,8 +14,8 @@ var defaultConfig = config{ type config struct { harvester.ForwarderConfig `config:",inline"` - QueueURLs []string `config:"queueURLs" validate:"nonzero,required"` - BucketNames []string `config:"bucketNames"` + QueueURLs []string `config:"queue_urls" validate:"nonzero,required"` + BucketNames []string `config:"bucket_names"` AccessKeyID string `config:"access_key_id"` SecretAccessKey string `config:"secret_access_key"` SessionToken string `config:"session_token"` diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 662102c897b..3bf41c1f8a7 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -203,18 +203,22 @@ func (p *Input) Run() { } }(output.Messages[i]) - select { - case <-done: - case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): - // if half of the set visibilityTimeout passed and this is - // still ongoing, then change visibility timeout. - changeMessageVisibilityInput.ReceiptHandle = output.Messages[i].ReceiptHandle - req := svcSQS.ChangeMessageVisibilityRequest(changeMessageVisibilityInput) - _, err = req.Send() - if err != nil { - p.logger.Error(errors.Wrap(err, "change message visibility failed")) + go func(message sqs.Message) { + for { + select { + case <-done: + case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): + // if half of the set visibilityTimeout passed and this is + // still ongoing, then change visibility timeout. + changeMessageVisibilityInput.ReceiptHandle = message.ReceiptHandle + req := svcSQS.ChangeMessageVisibilityRequest(changeMessageVisibilityInput) + _, err = req.Send() + if err != nil { + p.logger.Error(errors.Wrap(err, "change message visibility failed")) + } + } } - } + }(output.Messages[i]) } wg.Wait() } From cbf7f60b578300bc627115f552295d7ab5879312 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Sun, 7 Jul 2019 16:48:57 -0600 Subject: [PATCH 19/59] Remove bucket_names from config and add missing return --- x-pack/filebeat/_meta/common.reference.inputs.yml | 3 --- x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc | 8 -------- x-pack/filebeat/filebeat.reference.yml | 3 --- x-pack/filebeat/input/s3/config.go | 1 - x-pack/filebeat/input/s3/input.go | 9 +++------ 5 files changed, 3 insertions(+), 21 deletions(-) diff --git a/x-pack/filebeat/_meta/common.reference.inputs.yml b/x-pack/filebeat/_meta/common.reference.inputs.yml index da89243a3e4..d664923459e 100644 --- a/x-pack/filebeat/_meta/common.reference.inputs.yml +++ b/x-pack/filebeat/_meta/common.reference.inputs.yml @@ -69,6 +69,3 @@ # Queue urls (required) to receive queue messages from #queue_urls: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] - - # S3 buckets to collect logs from - #bucket_names: ["bucket-test-1", "bucket-test-2"] diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index abd8d28b9dd..8cfab3d1c01 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -24,7 +24,6 @@ Example configuration 1: {beatname_lc}.inputs: - type: s3 queue_urls: ["https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue"] - bucket_names: ["test-s3-bucket"] access_key_id: '${AWS_ACCESS_KEY_ID:""}' secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' session_token: '${AWS_SESSION_TOKEN:"”}' @@ -37,7 +36,6 @@ Example configuration 2: {beatname_lc}.inputs: - type: s3 queue_urls: ["https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue"] - bucket_names: ["test-s3-bucket"] credential_profile_name: test-s3-input ---- @@ -49,12 +47,6 @@ The `s3` input supports the following configuration options plus the URLs of the AWS SQS queues that messages will be received from. Required. -[float] -==== `bucket_names` - -Name of the S3 buckets to read log files from. If it's not specified, then read -from all S3 buckets that are pointed by SQS messages. - [float] ==== `aws credentials` diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index e805e7c98dc..543795cc21f 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -988,9 +988,6 @@ filebeat.inputs: # Queue urls (required) to receive queue messages from #queue_urls: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] - - # S3 buckets to collect logs from - #bucket_names: ["bucket-test-1", "bucket-test-2"] #========================== Filebeat autodiscover ============================== # Autodiscover allows you to detect changes in the system and spawn new modules diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index 96d072ff90e..f1dbc64993d 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -15,7 +15,6 @@ var defaultConfig = config{ type config struct { harvester.ForwarderConfig `config:",inline"` QueueURLs []string `config:"queue_urls" validate:"nonzero,required"` - BucketNames []string `config:"bucket_names"` AccessKeyID string `config:"access_key_id"` SecretAccessKey string `config:"secret_access_key"` SessionToken string `config:"session_token"` diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 3bf41c1f8a7..988cee84346 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -188,7 +188,7 @@ func (p *Input) Run() { defer wg.Done() defer close(done) - s3Infos, err := handleMessage(message, p.config.BucketNames) + s3Infos, err := handleMessage(message) if err != nil { p.logger.Error(err.Error()) } @@ -207,6 +207,7 @@ func (p *Input) Run() { for { select { case <-done: + return case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): // if half of the set visibilityTimeout passed and this is // still ongoing, then change visibility timeout. @@ -246,7 +247,7 @@ func getRegionFromQueueURL(queueURL string) (string, error) { } // handle message -func handleMessage(m sqs.Message, bucketNames []string) ([]s3Info, error) { +func handleMessage(m sqs.Message) ([]s3Info, error) { msg := map[string]interface{}{} err := json.Unmarshal([]byte(*m.Body), &msg) if err != nil { @@ -259,10 +260,6 @@ func handleMessage(m sqs.Message, bucketNames []string) ([]s3Info, error) { recordMap := record.(map[string]interface{}) if recordMap["eventSource"] == "aws:s3" && recordMap["eventName"] == "ObjectCreated:Put" { s3Info := s3Info{} - if !stringInSlice(recordMap["awsRegion"].(string), bucketNames) { - continue - } - s3Info.region = recordMap["awsRegion"].(string) s3Record := recordMap["s3"].(map[string]interface{}) From 561e6bc84def0b548758e156edeaa07404e4741b Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 8 Jul 2019 07:02:11 -0600 Subject: [PATCH 20/59] Fix unit tests --- x-pack/filebeat/input/s3/input_test.go | 27 +------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index df33ae8936a..f366c009223 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -57,7 +57,6 @@ func TestHandleMessage(t *testing.T) { cases := []struct { title string message sqs.Message - bucketNames []string expectedS3Infos []s3Info }{ { @@ -65,7 +64,6 @@ func TestHandleMessage(t *testing.T) { sqs.Message{ Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), }, - []string{}, []s3Info{ { name: "test-s3-ks-2", @@ -78,7 +76,6 @@ func TestHandleMessage(t *testing.T) { sqs.Message{ Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Delete\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), }, - []string{}, []s3Info{}, }, { @@ -86,35 +83,13 @@ func TestHandleMessage(t *testing.T) { sqs.Message{ Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:ec2\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), }, - []string{}, - []s3Info{}, - }, - { - "sqs message with right bucketNames", - sqs.Message{ - Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), - }, - []string{"ap-southeast-1"}, - []s3Info{ - { - name: "test-s3-ks-2", - key: "server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA", - }, - }, - }, - { - "sqs message with wrong bucketNames", - sqs.Message{ - Body: awssdk.String("{\"Records\":[{\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-southeast-1\",\"eventTime\":\"2019-06-21T16:16:54.629Z\",\"eventName\":\"ObjectCreated:Put\",\"s3\":{\"configurationId\":\"object-created-event\",\"bucket\":{\"name\":\"test-s3-ks-2\",\"arn\":\"arn:aws:s3:::test-s3-ks-2\"},\"object\":{\"key\":\"server-access-logging2019-06-21-16-16-54-E68E4316CEB285AA\"}}}]}"), - }, - []string{"us-west-1"}, []s3Info{}, }, } for _, c := range cases { t.Run(c.title, func(t *testing.T) { - s3Info, err := handleMessage(c.message, c.bucketNames) + s3Info, err := handleMessage(c.message) assert.NoError(t, err) assert.Equal(t, len(c.expectedS3Infos), len(s3Info)) if len(s3Info) > 0 { From 5d52a843833821c5f5b6cadfc92e41045cbb6569 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 8 Jul 2019 11:47:33 -0600 Subject: [PATCH 21/59] Add VisibilityTimeout into config with default to be 5min --- .../filebeat/_meta/common.reference.inputs.yml | 4 ++++ .../filebeat/docs/inputs/input-aws-s3.asciidoc | 10 ++++++++++ x-pack/filebeat/filebeat.reference.yml | 4 ++++ x-pack/filebeat/input/s3/config.go | 3 +++ x-pack/filebeat/input/s3/input.go | 18 ++++-------------- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/x-pack/filebeat/_meta/common.reference.inputs.yml b/x-pack/filebeat/_meta/common.reference.inputs.yml index d664923459e..16964b2c84e 100644 --- a/x-pack/filebeat/_meta/common.reference.inputs.yml +++ b/x-pack/filebeat/_meta/common.reference.inputs.yml @@ -69,3 +69,7 @@ # Queue urls (required) to receive queue messages from #queue_urls: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] + + # The duration (in seconds) that the received messages are hidden from subsequent + # retrieve requests after being retrieved by a ReceiveMessage request. + #visibility_timeout: 300 diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 8cfab3d1c01..8238b25a60d 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -47,6 +47,16 @@ The `s3` input supports the following configuration options plus the URLs of the AWS SQS queues that messages will be received from. Required. +[float] +==== `visibility_timeout` + +The duration (in seconds) that the received messages are hidden from subsequent +retrieve requests after being retrieved by a ReceiveMessage request. +This value needs to be a lot bigger than filebeat collection frequency so +if it took too long to read the s3 log, this sqs message will not be reprocessed. +The default visibility timeout for a message is 30 seconds. The minimum +is 0 seconds. The maximum is 12 hours. Default: 300 seconds. + [float] ==== `aws credentials` diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index 543795cc21f..f8eca3b5bcb 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -988,6 +988,10 @@ filebeat.inputs: # Queue urls (required) to receive queue messages from #queue_urls: ["https://sqs.us-east-1.amazonaws.com/1234/test-s3-logs-queue"] + + # The duration (in seconds) that the received messages are hidden from subsequent + # retrieve requests after being retrieved by a ReceiveMessage request. + #visibility_timeout: 300 #========================== Filebeat autodiscover ============================== # Autodiscover allows you to detect changes in the system and spawn new modules diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index f1dbc64993d..74c3680b871 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -10,6 +10,8 @@ var defaultConfig = config{ ForwarderConfig: harvester.ForwarderConfig{ Type: "s3", }, + ProfileName: "default", + VisibilityTimeout: 300, } type config struct { @@ -19,4 +21,5 @@ type config struct { SecretAccessKey string `config:"secret_access_key"` SessionToken string `config:"session_token"` ProfileName string `config:"credential_profile_name"` + VisibilityTimeout int `config:"visibility_timeout"` } diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 988cee84346..ab5bc4be88f 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -43,14 +43,6 @@ var ( // sooner than WaitTimeSeconds. If no messages are available and the wait time // expires, the call returns successfully with an empty list of messages. waitTimeSecond int64 = 10 - - // The duration (in seconds) that the received messages are hidden from subsequent - // retrieve requests after being retrieved by a ReceiveMessage request. - // This value needs to be a lot bigger than filebeat collection frequency so - // if it took too long to read the s3 log, this sqs message will not be reprocessed. - // The default visibility timeout for a message is 30 seconds. The minimum - // is 0 seconds. The maximum is 12 hours. - visibilityTimeout int64 = 300 ) func init() { @@ -130,17 +122,15 @@ func getAWSCredentials(config config) (awssdk.Config, error) { // If accessKeyID and secretAccessKey is not given, then load from default config // Please see https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html // with more details. - if config.ProfileName != "" { - return external.LoadDefaultAWSConfig( - external.WithSharedConfigProfile(config.ProfileName), - ) - } - return external.LoadDefaultAWSConfig() + return external.LoadDefaultAWSConfig( + external.WithSharedConfigProfile(config.ProfileName), + ) } // Run runs the input func (p *Input) Run() { p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) + visibilityTimeout := int64(p.config.VisibilityTimeout) for _, queueURL := range p.config.QueueURLs { regionName, err := getRegionFromQueueURL(queueURL) if err != nil { From 339b2a5648fd27799689305176aa04f430d751cf Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 9 Jul 2019 11:23:31 -0600 Subject: [PATCH 22/59] Add channel to close in Stop() --- x-pack/filebeat/input/s3/input.go | 130 +++++++++++++++++------------- 1 file changed, 72 insertions(+), 58 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index ab5bc4be88f..9d17f9db78d 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -58,6 +58,7 @@ type Input struct { config config awsConfig awssdk.Config logger *logp.Logger + close chan struct{} } type s3Info struct { @@ -96,6 +97,7 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input config: config, awsConfig: awsConfig, logger: logger, + close: make(chan struct{}), } return p, nil @@ -143,14 +145,7 @@ func (p *Input) Run() { svcSQS := sqs.New(awsConfig) svcS3 := s3.New(awsConfig) - // update message visibility timeout if it's taking longer than 1/2 of - // visibilityTimeout to make sure filebeat can finish reading - changeMessageVisibilityInput := &sqs.ChangeMessageVisibilityInput{ - QueueUrl: &queueURL, - VisibilityTimeout: &visibilityTimeout, - } - - // receive messages + // receive messages from sqs req := svcSQS.ReceiveMessageRequest( &sqs.ReceiveMessageInput{ QueueUrl: &queueURL, @@ -165,60 +160,21 @@ func (p *Input) Run() { continue } - // process messages received from sqs - if len(output.Messages) > 0 { - var wg sync.WaitGroup - numMessages := len(output.Messages) - wg.Add(numMessages) - - for i := range output.Messages { - done := make(chan struct{}) - // launch goroutine to handle each message from sqs - go func(message sqs.Message) { - defer wg.Done() - defer close(done) - - s3Infos, err := handleMessage(message) - if err != nil { - p.logger.Error(err.Error()) - } - - // read from s3 object and create event for each log line - p.readS3Object(svcS3, s3Infos) - - // delete message after events are sent - err = deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) - if err != nil { - p.logger.Error(errors.Wrap(err, "deleteMessages failed")) - } - }(output.Messages[i]) - - go func(message sqs.Message) { - for { - select { - case <-done: - return - case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): - // if half of the set visibilityTimeout passed and this is - // still ongoing, then change visibility timeout. - changeMessageVisibilityInput.ReceiptHandle = message.ReceiptHandle - req := svcSQS.ChangeMessageVisibilityRequest(changeMessageVisibilityInput) - _, err = req.Send() - if err != nil { - p.logger.Error(errors.Wrap(err, "change message visibility failed")) - } - } - } - }(output.Messages[i]) - } - wg.Wait() + if len(output.Messages) == 0 { + p.logger.Debug("no message received from SQS:", queueURL) + continue } + + // process messages received from sqs, get logs from s3 and create events + p.processor(queueURL, output.Messages, visibilityTimeout, svcS3, svcSQS) } } -// Stop stops the input and all its harvesters +// Stop stops the s3 input func (p *Input) Stop() { - p.outlet.Close() + close(p.close) + defer p.outlet.Close() + p.logger.Info("Stopping s3 input") } // Wait stops the s3 input. @@ -226,6 +182,64 @@ func (p *Input) Wait() { p.Stop() } +func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTimeout int64, svcS3 *s3.S3, svcSQS *sqs.SQS) { + var wg sync.WaitGroup + numMessages := len(messages) + wg.Add(numMessages) + + // update message visibility timeout if it's taking longer than 1/2 of + // visibilityTimeout to make sure filebeat can finish reading + changeMessageVisibilityInput := &sqs.ChangeMessageVisibilityInput{ + QueueUrl: &queueURL, + VisibilityTimeout: &visibilityTimeout, + } + + // process messages received from sqs + for i := range messages { + done := make(chan struct{}) + // launch goroutine to handle each message from sqs + go func(message sqs.Message) { + defer wg.Done() + defer close(done) + + s3Infos, err := handleMessage(message) + if err != nil { + p.logger.Error(err.Error()) + } + + // read from s3 object and create event for each log line + p.readS3CreateEvents(svcS3, s3Infos) + + // delete message after events are sent + err = deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) + if err != nil { + p.logger.Error(errors.Wrap(err, "deleteMessages failed")) + } + }(messages[i]) + + go func(message sqs.Message) { + for { + select { + case <-p.close: + return + case <-done: + return + case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): + // if half of the set visibilityTimeout passed and this is + // still ongoing, then change visibility timeout. + changeMessageVisibilityInput.ReceiptHandle = message.ReceiptHandle + req := svcSQS.ChangeMessageVisibilityRequest(changeMessageVisibilityInput) + _, err := req.Send() + if err != nil { + p.logger.Error(errors.Wrap(err, "change message visibility failed")) + } + } + } + }(messages[i]) + } + wg.Wait() +} + func getRegionFromQueueURL(queueURL string) (string, error) { // get region from queueURL // Example: https://sqs.us-east-1.amazonaws.com/627959692251/test-s3-logs @@ -279,7 +293,7 @@ func stringInSlice(name string, bucketNames []string) bool { return false } -func (p *Input) readS3Object(svc s3iface.S3API, s3Infos []s3Info) { +func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { if len(s3Infos) > 0 { var wg sync.WaitGroup numS3Infos := len(s3Infos) From 991c29dd6502c19d6b83d039a33de660f4a16477 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Thu, 11 Jul 2019 14:36:37 -0600 Subject: [PATCH 23/59] Add id for each event --- .../docs/inputs/input-aws-s3.asciidoc | 4 +- x-pack/filebeat/input/s3/config.go | 20 ++-- x-pack/filebeat/input/s3/input.go | 101 +++++++++++------- 3 files changed, 75 insertions(+), 50 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 8238b25a60d..f39026873a7 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -54,8 +54,8 @@ The duration (in seconds) that the received messages are hidden from subsequent retrieve requests after being retrieved by a ReceiveMessage request. This value needs to be a lot bigger than filebeat collection frequency so if it took too long to read the s3 log, this sqs message will not be reprocessed. -The default visibility timeout for a message is 30 seconds. The minimum -is 0 seconds. The maximum is 12 hours. Default: 300 seconds. +The default visibility timeout for a message is 300 seconds. The minimum +is 0 seconds. The maximum is 12 hours. [float] ==== `aws credentials` diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index 74c3680b871..fe1e60aac2b 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -4,22 +4,26 @@ package s3 -import "github.com/elastic/beats/filebeat/harvester" +import ( + "time" + + "github.com/elastic/beats/filebeat/harvester" +) var defaultConfig = config{ ForwarderConfig: harvester.ForwarderConfig{ Type: "s3", }, ProfileName: "default", - VisibilityTimeout: 300, + VisibilityTimeout: 300 * time.Second, } type config struct { harvester.ForwarderConfig `config:",inline"` - QueueURLs []string `config:"queue_urls" validate:"nonzero,required"` - AccessKeyID string `config:"access_key_id"` - SecretAccessKey string `config:"secret_access_key"` - SessionToken string `config:"session_token"` - ProfileName string `config:"credential_profile_name"` - VisibilityTimeout int `config:"visibility_timeout"` + QueueURLs []string `config:"queue_urls" validate:"nonzero,required"` + AccessKeyID string `config:"access_key_id"` + SecretAccessKey string `config:"secret_access_key"` + SessionToken string `config:"session_token"` + ProfileName string `config:"credential_profile_name"` + VisibilityTimeout time.Duration `config:"visibility_timeout"` } diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 9d17f9db78d..d7e407f03bd 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -6,8 +6,11 @@ package s3 import ( "bufio" + "crypto/sha256" + "encoding/hex" "encoding/json" "io" + "strconv" "strings" "sync" "time" @@ -30,10 +33,9 @@ import ( "github.com/elastic/beats/libbeat/logp" ) -var ( - // Filebeat input name - inputName = "s3" +const inputName = "s3" +var ( // The maximum number of messages to return. Amazon SQS never returns more messages // than this value (however, fewer messages might be returned). maxNumberOfMessage int64 = 10 @@ -83,8 +85,9 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return nil, err } - if len(config.QueueURLs) == 0 { - return nil, errors.Wrap(err, "no sqs queueURLs configured") + err = config.validate() + if err != nil { + return nil, errors.Wrapf(err, "validation for s3 input config failed: config = %s", config) } awsConfig, err := getAWSCredentials(config) @@ -103,6 +106,18 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return p, nil } +func (c *config) validate() error { + if c.VisibilityTimeout < 0 || c.VisibilityTimeout.Hours() > 12 { + return errors.New("visibilityTimeout is not defined within the " + + "expected bounds") + } + + if len(c.QueueURLs) == 0 { + return errors.New("no SQS queueURLs are configured") + } + return nil +} + func getAWSCredentials(config config) (awssdk.Config, error) { // Check if accessKeyID and secretAccessKey is given from configuration if config.AccessKeyID != "" && config.SecretAccessKey != "" { @@ -132,7 +147,8 @@ func getAWSCredentials(config config) (awssdk.Config, error) { // Run runs the input func (p *Input) Run() { p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) - visibilityTimeout := int64(p.config.VisibilityTimeout) + visibilityTimeout := int64(p.config.VisibilityTimeout.Seconds()) + for _, queueURL := range p.config.QueueURLs { regionName, err := getRegionFromQueueURL(queueURL) if err != nil { @@ -187,13 +203,6 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim numMessages := len(messages) wg.Add(numMessages) - // update message visibility timeout if it's taking longer than 1/2 of - // visibilityTimeout to make sure filebeat can finish reading - changeMessageVisibilityInput := &sqs.ChangeMessageVisibilityInput{ - QueueUrl: &queueURL, - VisibilityTimeout: &visibilityTimeout, - } - // process messages received from sqs for i := range messages { done := make(chan struct{}) @@ -204,7 +213,8 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim s3Infos, err := handleMessage(message) if err != nil { - p.logger.Error(err.Error()) + p.logger.Error(errors.Wrap(err, "handelMessage failed")) + return } // read from s3 object and create event for each log line @@ -225,11 +235,9 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim case <-done: return case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): - // if half of the set visibilityTimeout passed and this is + // If half of the set visibilityTimeout passed and this is // still ongoing, then change visibility timeout. - changeMessageVisibilityInput.ReceiptHandle = message.ReceiptHandle - req := svcSQS.ChangeMessageVisibilityRequest(changeMessageVisibilityInput) - _, err := req.Send() + err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) if err != nil { p.logger.Error(errors.Wrap(err, "change message visibility failed")) } @@ -240,6 +248,16 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim wg.Wait() } +func changeVisibilityTimeout(queueURL string, visibilityTimeout int64, svc *sqs.SQS, receiptHandle *string) error { + req := svc.ChangeMessageVisibilityRequest(&sqs.ChangeMessageVisibilityInput{ + QueueUrl: &queueURL, + VisibilityTimeout: &visibilityTimeout, + ReceiptHandle: receiptHandle, + }) + _, err := req.Send() + return err +} + func getRegionFromQueueURL(queueURL string) (string, error) { // get region from queueURL // Example: https://sqs.us-east-1.amazonaws.com/627959692251/test-s3-logs @@ -278,21 +296,6 @@ func handleMessage(m sqs.Message) ([]s3Info, error) { return s3Infos, nil } -// stringInSlice checks if a string is already exists in list -// If there is no bucketNames configured, then collect all. -func stringInSlice(name string, bucketNames []string) bool { - if bucketNames == nil || len(bucketNames) == 0 { - return true - } - - for _, v := range bucketNames { - if v == name { - return true - } - } - return false -} - func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { if len(s3Infos) > 0 { var wg sync.WaitGroup @@ -307,7 +310,7 @@ func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { // read from s3 object reader, err := bufferedIORead(svc, s3Info) if err != nil { - p.logger.Error(errors.Wrap(err, "s3 get object request failed")) + p.logger.Error(errors.Wrap(err, "bufferedIORead failed")) return } @@ -320,17 +323,25 @@ func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { if err != nil { if err == io.EOF { + // create event for last line offset += len([]byte(log)) - p.forwardEvent(createEvent(log, offset, s3Info)) + err = p.forwardEvent(createEvent(log, offset, s3Info)) + if err != nil { + p.logger.Error(errors.Wrap(err, "forwardEvent failed")) + } break - } else { - p.logger.Error(errors.Wrap(err, "ReadString failed")) } + + p.logger.Error(errors.Wrap(err, "ReadString failed")) + break } // create event per log line offset += len([]byte(log)) - p.forwardEvent(createEvent(log, offset, s3Info)) + err = p.forwardEvent(createEvent(log, offset, s3Info)) + if err != nil { + p.logger.Error(errors.Wrap(err, "forwardEvent failed")) + } } }(s3Infos[i]) } @@ -353,13 +364,14 @@ func bufferedIORead(svc s3iface.S3API, s3Info s3Info) (*bufio.Reader, error) { return bufio.NewReader(resp.Body), nil } -func (p *Input) forwardEvent(event *beat.Event) { +func (p *Input) forwardEvent(event *beat.Event) error { forwarder := harvester.NewForwarder(p.outlet) d := &util.Data{Event: *event} err := forwarder.Send(d) if err != nil { - p.logger.Error(errors.Wrap(err, "forwarder send failed")) + return errors.Wrap(err, "forwarder send failed") } + return nil } func deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.SQS) error { @@ -395,6 +407,7 @@ func createEvent(log string, offset int, s3Info s3Info) *beat.Event { }, } return &beat.Event{ + Meta: common.MapStr{"id": makeEventID(s3Info, offset)}, Timestamp: time.Now(), Fields: f, } @@ -403,3 +416,11 @@ func createEvent(log string, offset int, s3Info s3Info) *beat.Event { func constructObjectURL(info s3Info) string { return "https://" + info.name + ".s3-" + info.region + ".amazonaws.com/" + info.key } + +// makeTopicID returns a short sha256 hash of the bucket name + object key name + offset. +func makeEventID(s3Info s3Info, offset int) string { + h := sha256.New() + h.Write([]byte(s3Info.name + s3Info.key + "-" + strconv.Itoa(offset))) + prefix := hex.EncodeToString(h.Sum(nil)) + return prefix[:10] +} From 12ee91cf3100ac9963843c574ae800aadbeb2a99 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 12 Jul 2019 14:01:11 -0600 Subject: [PATCH 24/59] add sqsMessage struct to decode resp body from sqs --- x-pack/filebeat/input/s3/input.go | 48 +++++++++++++++++--------- x-pack/filebeat/input/s3/input_test.go | 5 +-- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index d7e407f03bd..a5aa4396027 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -67,6 +67,24 @@ type s3Info struct { name string key string region string + arn string +} + +type sqsMessage struct { + Records []struct { + EventSource string `json:"eventSource"` + AwsRegion string `json:"awsRegion"` + EventName string `json:"eventName"` + S3 struct { + Bucket struct { + Name string `json:"name"` + Arn string `json:"arn"` + } `json:"bucket"` + Object struct { + Key string `json:"key"` + } `json:"object"` + } `json:"s3"` + } `json:"Records"` } // NewInput creates a new s3 input @@ -270,27 +288,21 @@ func getRegionFromQueueURL(queueURL string) (string, error) { // handle message func handleMessage(m sqs.Message) ([]s3Info, error) { - msg := map[string]interface{}{} + msg := sqsMessage{} err := json.Unmarshal([]byte(*m.Body), &msg) if err != nil { return nil, errors.Wrap(err, "json unmarshal sqs message body failed") } var s3Infos []s3Info - records := msg["Records"].([]interface{}) - for _, record := range records { - recordMap := record.(map[string]interface{}) - if recordMap["eventSource"] == "aws:s3" && recordMap["eventName"] == "ObjectCreated:Put" { - s3Info := s3Info{} - s3Info.region = recordMap["awsRegion"].(string) - s3Record := recordMap["s3"].(map[string]interface{}) - - bucketInfo := s3Record["bucket"].(map[string]interface{}) - s3Info.name = bucketInfo["name"].(string) - - objectInfo := s3Record["object"].(map[string]interface{}) - s3Info.key = objectInfo["key"].(string) - s3Infos = append(s3Infos, s3Info) + for _, record := range msg.Records { + if record.EventSource == "aws:s3" && record.EventName == "ObjectCreated:Put" { + s3Infos = append(s3Infos, s3Info{ + region: record.AwsRegion, + name: record.S3.Bucket.Name, + key: record.S3.Object.Key, + arn: record.S3.Bucket.Arn, + }) } } return s3Infos, nil @@ -397,8 +409,10 @@ func createEvent(log string, offset int, s3Info s3Info) *beat.Event { }, "aws": common.MapStr{ "s3": common.MapStr{ - "bucket_name": s3Info.name, - "object_key": s3Info.key, + "bucket": common.MapStr{ + "name": s3Info.name, + "arn": s3Info.arn}, + "object.key": s3Info.key, }, }, "cloud": common.MapStr{ diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index f366c009223..58e1c66a0dc 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -127,6 +127,7 @@ func TestCreateEvent(t *testing.T) { name: "test-s3-ks", key: "log2019-06-21-16-16-54", region: "us-west-1", + arn: "arn:aws:s3:::test-s3-ks", } reader, err := bufferedIORead(mockSvc, s3Info) @@ -149,11 +150,11 @@ func TestCreateEvent(t *testing.T) { assert.Equal(t, 2, len(events)) - bucketName, err := events[0].Fields.GetValue("aws.s3.bucket_name") + bucketName, err := events[0].Fields.GetValue("aws.s3.bucket.name") assert.NoError(t, err) assert.Equal(t, "test-s3-ks", bucketName.(string)) - objectKey, err := events[0].Fields.GetValue("aws.s3.object_key") + objectKey, err := events[0].Fields.GetValue("aws.s3.object.key") assert.NoError(t, err) assert.Equal(t, "log2019-06-21-16-16-54", objectKey.(string)) From af21325aa17fbbd6c96aee0db81c8781156adcbb Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Sun, 14 Jul 2019 21:21:04 -0600 Subject: [PATCH 25/59] Change event id to hash + offset --- x-pack/filebeat/input/s3/input.go | 17 +++++++------- x-pack/filebeat/input/s3/input_test.go | 31 ++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index a5aa4396027..70feaa80b36 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -9,8 +9,8 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "fmt" "io" - "strconv" "strings" "sync" "time" @@ -315,6 +315,7 @@ func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { wg.Add(numS3Infos) for i := range s3Infos { + objectHash := s3ObjectHash(s3Infos[i]) go func(s3Info s3Info) { // launch goroutine to handle each message defer wg.Done() @@ -337,7 +338,7 @@ func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { if err == io.EOF { // create event for last line offset += len([]byte(log)) - err = p.forwardEvent(createEvent(log, offset, s3Info)) + err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { p.logger.Error(errors.Wrap(err, "forwardEvent failed")) } @@ -350,7 +351,7 @@ func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { // create event per log line offset += len([]byte(log)) - err = p.forwardEvent(createEvent(log, offset, s3Info)) + err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { p.logger.Error(errors.Wrap(err, "forwardEvent failed")) } @@ -400,7 +401,7 @@ func deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.SQ return nil } -func createEvent(log string, offset int, s3Info s3Info) *beat.Event { +func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat.Event { f := common.MapStr{ "message": log, "log": common.MapStr{ @@ -421,7 +422,7 @@ func createEvent(log string, offset int, s3Info s3Info) *beat.Event { }, } return &beat.Event{ - Meta: common.MapStr{"id": makeEventID(s3Info, offset)}, + Meta: common.MapStr{"id": objectHash + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } @@ -431,10 +432,10 @@ func constructObjectURL(info s3Info) string { return "https://" + info.name + ".s3-" + info.region + ".amazonaws.com/" + info.key } -// makeTopicID returns a short sha256 hash of the bucket name + object key name + offset. -func makeEventID(s3Info s3Info, offset int) string { +// s3ObjectHash returns a short sha256 hash of the bucket name + object key name. +func s3ObjectHash(s3Info s3Info) string { h := sha256.New() - h.Write([]byte(s3Info.name + s3Info.key + "-" + strconv.Itoa(offset))) + h.Write([]byte(s3Info.name + s3Info.key)) prefix := hex.EncodeToString(h.Sum(nil)) return prefix[:10] } diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 58e1c66a0dc..84659b41128 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -6,6 +6,7 @@ package s3 import ( "bytes" + "fmt" "io" "io/ioutil" "testing" @@ -129,6 +130,7 @@ func TestCreateEvent(t *testing.T) { region: "us-west-1", arn: "arn:aws:s3:::test-s3-ks", } + s3ObjectHash := s3ObjectHash(s3Info) reader, err := bufferedIORead(mockSvc, s3Info) assert.NoError(t, err) @@ -139,12 +141,12 @@ func TestCreateEvent(t *testing.T) { break } if err == io.EOF { - event := createEvent(log, len([]byte(log)), s3Info) + event := createEvent(log, len([]byte(log)), s3Info, s3ObjectHash) events = append(events, event) break } - event := createEvent(log, len([]byte(log)), s3Info) + event := createEvent(log, len([]byte(log)), s3Info, s3ObjectHash) events = append(events, event) } @@ -205,3 +207,28 @@ func TestConstructObjectURL(t *testing.T) { }) } } + +func TestConvertOffsetToString(t *testing.T) { + cases := []struct { + offset int + expectedString string + }{ + { + 123, + "000000000123", + }, + { + 123456, + "000000123456", + }, + { + 123456789123, + "123456789123", + }, + } + for _, c := range cases { + output := fmt.Sprintf("%012d", c.offset) + assert.Equal(t, c.expectedString, output) + } + +} From e841d36603eb7090d28c088d2b63208dcabf1fa6 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 15 Jul 2019 11:34:29 -0600 Subject: [PATCH 26/59] Add error channel to handleS3Objects --- x-pack/filebeat/input/s3/input.go | 46 +++++++++++++++++--------- x-pack/filebeat/input/s3/input_test.go | 2 +- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 70feaa80b36..8a8ee03cb55 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -229,14 +229,24 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim defer wg.Done() defer close(done) - s3Infos, err := handleMessage(message) + s3Infos, err := handleSQSMessage(message) if err != nil { p.logger.Error(errors.Wrap(err, "handelMessage failed")) return } // read from s3 object and create event for each log line - p.readS3CreateEvents(svcS3, s3Infos) + errC := p.handleS3Objects(svcS3, s3Infos) + err = <-errC + if err != nil { + // Change visibility timeout to 0 so this message comes back to + // SQS queue immediately. + err := changeVisibilityTimeout(queueURL, 0, svcSQS, message.ReceiptHandle) + if err != nil { + p.logger.Error(errors.Wrap(err, "change message visibility failed")) + } + return + } // delete message after events are sent err = deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) @@ -287,7 +297,7 @@ func getRegionFromQueueURL(queueURL string) (string, error) { } // handle message -func handleMessage(m sqs.Message) ([]s3Info, error) { +func handleSQSMessage(m sqs.Message) ([]s3Info, error) { msg := sqsMessage{} err := json.Unmarshal([]byte(*m.Body), &msg) if err != nil { @@ -308,22 +318,22 @@ func handleMessage(m sqs.Message) ([]s3Info, error) { return s3Infos, nil } -func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { +func (p *Input) handleS3Objects(svc s3iface.S3API, s3Infos []s3Info) <-chan error { + errC := make(chan error) + if len(s3Infos) > 0 { var wg sync.WaitGroup - numS3Infos := len(s3Infos) - wg.Add(numS3Infos) + wg.Add(len(s3Infos)) for i := range s3Infos { objectHash := s3ObjectHash(s3Infos[i]) go func(s3Info s3Info) { - // launch goroutine to handle each message defer wg.Done() // read from s3 object reader, err := bufferedIORead(svc, s3Info) if err != nil { - p.logger.Error(errors.Wrap(err, "bufferedIORead failed")) + errC <- errors.Wrap(err, "bufferedIORead failed") return } @@ -340,26 +350,30 @@ func (p *Input) readS3CreateEvents(svc s3iface.S3API, s3Infos []s3Info) { offset += len([]byte(log)) err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { - p.logger.Error(errors.Wrap(err, "forwardEvent failed")) + errC <- errors.Wrap(err, "forwardEvent failed") } - break + return } - p.logger.Error(errors.Wrap(err, "ReadString failed")) - break + errC <- errors.Wrap(err, "ReadString failed") + return } // create event per log line offset += len([]byte(log)) err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { - p.logger.Error(errors.Wrap(err, "forwardEvent failed")) + errC <- errors.Wrap(err, "forwardEvent failed") + return } } }(s3Infos[i]) } wg.Wait() } + + close(errC) + return errC } func bufferedIORead(svc s3iface.S3API, s3Info s3Info) (*bufio.Reader, error) { @@ -422,7 +436,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - Meta: common.MapStr{"id": objectHash + fmt.Sprintf("%012d", offset)}, + Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } @@ -432,10 +446,10 @@ func constructObjectURL(info s3Info) string { return "https://" + info.name + ".s3-" + info.region + ".amazonaws.com/" + info.key } -// s3ObjectHash returns a short sha256 hash of the bucket name + object key name. +// s3ObjectHash returns a short sha256 hash of the bucket arn + object key name. func s3ObjectHash(s3Info s3Info) string { h := sha256.New() - h.Write([]byte(s3Info.name + s3Info.key)) + h.Write([]byte(s3Info.arn + s3Info.key)) prefix := hex.EncodeToString(h.Sum(nil)) return prefix[:10] } diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 84659b41128..7768b98637c 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -90,7 +90,7 @@ func TestHandleMessage(t *testing.T) { for _, c := range cases { t.Run(c.title, func(t *testing.T) { - s3Info, err := handleMessage(c.message) + s3Info, err := handleSQSMessage(c.message) assert.NoError(t, err) assert.Equal(t, len(c.expectedS3Infos), len(s3Info)) if len(s3Info) > 0 { From 8744cbfa1e1f4bb716b84347a668fad833b048a7 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 15 Jul 2019 17:33:08 -0600 Subject: [PATCH 27/59] Move deleteMessage after done --- x-pack/filebeat/input/s3/input.go | 42 +++++++++++++------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 8a8ee03cb55..dc1e73d54f8 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -224,10 +224,12 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim // process messages received from sqs for i := range messages { done := make(chan struct{}) + errC := make(chan error) // launch goroutine to handle each message from sqs go func(message sqs.Message) { defer wg.Done() defer close(done) + defer close(errC) s3Infos, err := handleSQSMessage(message) if err != nil { @@ -236,23 +238,7 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim } // read from s3 object and create event for each log line - errC := p.handleS3Objects(svcS3, s3Infos) - err = <-errC - if err != nil { - // Change visibility timeout to 0 so this message comes back to - // SQS queue immediately. - err := changeVisibilityTimeout(queueURL, 0, svcSQS, message.ReceiptHandle) - if err != nil { - p.logger.Error(errors.Wrap(err, "change message visibility failed")) - } - return - } - - // delete message after events are sent - err = deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) - if err != nil { - p.logger.Error(errors.Wrap(err, "deleteMessages failed")) - } + p.handleS3Objects(svcS3, s3Infos, errC) }(messages[i]) go func(message sqs.Message) { @@ -261,6 +247,17 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim case <-p.close: return case <-done: + err := deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) + if err != nil { + p.logger.Error(errors.Wrap(err, "deleteMessages failed")) + } + return + case <-errC: + err := changeVisibilityTimeout(queueURL, 0, svcSQS, message.ReceiptHandle) + if err != nil { + p.logger.Error(errors.Wrap(err, "change message visibility failed")) + } + p.logger.Info("message visibility updated to 0") return case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): // If half of the set visibilityTimeout passed and this is @@ -269,6 +266,7 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim if err != nil { p.logger.Error(errors.Wrap(err, "change message visibility failed")) } + p.logger.Infof("message visibility updated to %s", visibilityTimeout) } } }(messages[i]) @@ -318,17 +316,16 @@ func handleSQSMessage(m sqs.Message) ([]s3Info, error) { return s3Infos, nil } -func (p *Input) handleS3Objects(svc s3iface.S3API, s3Infos []s3Info) <-chan error { - errC := make(chan error) - +func (p *Input) handleS3Objects(svc s3iface.S3API, s3Infos []s3Info, errC chan error) { if len(s3Infos) > 0 { var wg sync.WaitGroup wg.Add(len(s3Infos)) for i := range s3Infos { - objectHash := s3ObjectHash(s3Infos[i]) + go func(s3Info s3Info) { defer wg.Done() + objectHash := s3ObjectHash(s3Info) // read from s3 object reader, err := bufferedIORead(svc, s3Info) @@ -371,9 +368,6 @@ func (p *Input) handleS3Objects(svc s3iface.S3API, s3Infos []s3Info) <-chan erro } wg.Wait() } - - close(errC) - return errC } func bufferedIORead(svc s3iface.S3API, s3Info s3Info) (*bufio.Reader, error) { From 9fb151e79861debb19f3159c375de7f28ec38650 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 15 Jul 2019 18:35:11 -0600 Subject: [PATCH 28/59] Replace done channel with errC channel --- x-pack/filebeat/input/s3/input.go | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index dc1e73d54f8..4ecf341c635 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -105,7 +105,7 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input err = config.validate() if err != nil { - return nil, errors.Wrapf(err, "validation for s3 input config failed: config = %s", config) + return nil, errors.Wrapf(err, "validation for s3 input config failed: config = %v", config) } awsConfig, err := getAWSCredentials(config) @@ -223,12 +223,10 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim // process messages received from sqs for i := range messages { - done := make(chan struct{}) errC := make(chan error) // launch goroutine to handle each message from sqs go func(message sqs.Message) { defer wg.Done() - defer close(done) defer close(errC) s3Infos, err := handleSQSMessage(message) @@ -246,18 +244,19 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim select { case <-p.close: return - case <-done: - err := deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) + case err := <-errC: if err != nil { - p.logger.Error(errors.Wrap(err, "deleteMessages failed")) - } - return - case <-errC: - err := changeVisibilityTimeout(queueURL, 0, svcSQS, message.ReceiptHandle) - if err != nil { - p.logger.Error(errors.Wrap(err, "change message visibility failed")) + err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) + if err != nil { + p.logger.Error(errors.Wrap(err, "change message visibility failed")) + } + p.logger.Infof("message visibility updated to %v", visibilityTimeout) + } else { + err := deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) + if err != nil { + p.logger.Error(errors.Wrap(err, "deleteMessages failed")) + } } - p.logger.Info("message visibility updated to 0") return case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): // If half of the set visibilityTimeout passed and this is @@ -266,7 +265,7 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim if err != nil { p.logger.Error(errors.Wrap(err, "change message visibility failed")) } - p.logger.Infof("message visibility updated to %s", visibilityTimeout) + p.logger.Infof("message visibility updated to %v", visibilityTimeout) } } }(messages[i]) From 824689a733763d1bfe26e143d12bae50adae2146 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 16 Jul 2019 10:54:17 -0600 Subject: [PATCH 29/59] Update documentation --- .../docs/inputs/input-aws-s3.asciidoc | 14 +++++--- x-pack/filebeat/input/s3/input.go | 33 ++++++++++--------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index f39026873a7..379f8ed75b0 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -12,10 +12,16 @@ beta[] Use the `s3` input to retrieve logs from S3 objects that are pointed by messages -from specific SQS queues. - -This input can, for example, be used to receive S3 server access logs to monitor -detailed records for the requests that are made to a bucket. +from specific SQS queues. This input can, for example, be used to receive S3 +server access logs to monitor detailed records for the requests that are made to +a bucket. + +When processing a s3 object which pointed by a sqs message, if half of the set +visibility timeout passed and the processing is still ongoing, then the +visibility timeout of that sqs message will be reset to make sure the message +does not go back to the queue in the middle of the processing. If there are +errors happening during the processing of the s3 object, then the process will be +stopped and the sqs message will be returned back to the queue. Example configuration 1: diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 4ecf341c635..c393d3be761 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -6,6 +6,7 @@ package s3 import ( "bufio" + "context" "crypto/sha256" "encoding/hex" "encoding/json" @@ -188,7 +189,7 @@ func (p *Input) Run() { VisibilityTimeout: &visibilityTimeout, WaitTimeSeconds: &waitTimeSecond, }) - output, err := req.Send() + output, err := req.Send(context.Background()) if err != nil { p.logger.Error("failed to receive message from SQS:", err) continue @@ -216,7 +217,7 @@ func (p *Input) Wait() { p.Stop() } -func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTimeout int64, svcS3 *s3.S3, svcSQS *sqs.SQS) { +func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTimeout int64, svcS3 *s3.Client, svcSQS *sqs.Client) { var wg sync.WaitGroup numMessages := len(messages) wg.Add(numMessages) @@ -246,11 +247,12 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim return case err := <-errC: if err != nil { + p.logger.Warnf("Processing message failed: %v", err) err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) if err != nil { p.logger.Error(errors.Wrap(err, "change message visibility failed")) } - p.logger.Infof("message visibility updated to %v", visibilityTimeout) + p.logger.Warnf("Message visibility timeout updated to %v", visibilityTimeout) } else { err := deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) if err != nil { @@ -265,7 +267,7 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim if err != nil { p.logger.Error(errors.Wrap(err, "change message visibility failed")) } - p.logger.Infof("message visibility updated to %v", visibilityTimeout) + p.logger.Infof("Message visibility timeout updated to %v", visibilityTimeout) } } }(messages[i]) @@ -273,13 +275,13 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim wg.Wait() } -func changeVisibilityTimeout(queueURL string, visibilityTimeout int64, svc *sqs.SQS, receiptHandle *string) error { +func changeVisibilityTimeout(queueURL string, visibilityTimeout int64, svc *sqs.Client, receiptHandle *string) error { req := svc.ChangeMessageVisibilityRequest(&sqs.ChangeMessageVisibilityInput{ QueueUrl: &queueURL, VisibilityTimeout: &visibilityTimeout, ReceiptHandle: receiptHandle, }) - _, err := req.Send() + _, err := req.Send(context.Background()) return err } @@ -315,13 +317,12 @@ func handleSQSMessage(m sqs.Message) ([]s3Info, error) { return s3Infos, nil } -func (p *Input) handleS3Objects(svc s3iface.S3API, s3Infos []s3Info, errC chan error) { +func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC chan error) { if len(s3Infos) > 0 { var wg sync.WaitGroup wg.Add(len(s3Infos)) for i := range s3Infos { - go func(s3Info s3Info) { defer wg.Done() objectHash := s3ObjectHash(s3Info) @@ -346,12 +347,12 @@ func (p *Input) handleS3Objects(svc s3iface.S3API, s3Infos []s3Info, errC chan e offset += len([]byte(log)) err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { - errC <- errors.Wrap(err, "forwardEvent failed") + errC <- errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) } return } - errC <- errors.Wrap(err, "ReadString failed") + errC <- errors.Wrapf(err, "ReadString failed for %v", s3Info.key) return } @@ -359,7 +360,7 @@ func (p *Input) handleS3Objects(svc s3iface.S3API, s3Infos []s3Info, errC chan e offset += len([]byte(log)) err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { - errC <- errors.Wrap(err, "forwardEvent failed") + errC <- errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) return } } @@ -369,16 +370,16 @@ func (p *Input) handleS3Objects(svc s3iface.S3API, s3Infos []s3Info, errC chan e } } -func bufferedIORead(svc s3iface.S3API, s3Info s3Info) (*bufio.Reader, error) { +func bufferedIORead(svc s3iface.ClientAPI, s3Info s3Info) (*bufio.Reader, error) { s3GetObjectInput := &s3.GetObjectInput{ Bucket: awssdk.String(s3Info.name), Key: awssdk.String(s3Info.key), } req := svc.GetObjectRequest(s3GetObjectInput) - resp, err := req.Send() + resp, err := req.Send(context.Background()) if err != nil { - return nil, errors.Wrap(err, "s3 get object request failed") + return nil, errors.Wrapf(err, "s3 get object request failed %v", s3Info.key) } return bufio.NewReader(resp.Body), nil @@ -394,14 +395,14 @@ func (p *Input) forwardEvent(event *beat.Event) error { return nil } -func deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.SQS) error { +func deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.Client) error { deleteMessageInput := &sqs.DeleteMessageInput{ QueueUrl: awssdk.String(queueURL), ReceiptHandle: awssdk.String(messagesReceiptHandle), } req := svcSQS.DeleteMessageRequest(deleteMessageInput) - _, err := req.Send() + _, err := req.Send(context.Background()) if err != nil { return errors.Wrap(err, "DeleteMessageRequest failed") } From 3f4bd22449be1386a8d28b57c7946d8f1f136343 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 16 Jul 2019 11:19:55 -0600 Subject: [PATCH 30/59] Fix unit test --- x-pack/filebeat/input/s3/input_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 7768b98637c..75d292d1915 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -22,7 +22,7 @@ import ( // MockS3Client struct is used for unit tests. type MockS3Client struct { - s3iface.S3API + s3iface.ClientAPI } var ( From 1314fa3b8c45fb3e89e02f0d286ac0b7b43a08ae Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 16 Jul 2019 13:17:42 -0600 Subject: [PATCH 31/59] Fix unit test --- x-pack/filebeat/input/s3/input_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 75d292d1915..4927965b5f7 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "io/ioutil" + "net/http" "testing" awssdk "github.com/aws/aws-sdk-go-v2/aws" @@ -38,11 +39,13 @@ var ( func (m *MockS3Client) GetObjectRequest(input *s3.GetObjectInput) s3.GetObjectRequest { logBody := ioutil.NopCloser(bytes.NewReader([]byte(s3LogString1 + s3LogString2))) + httpReq, _ := http.NewRequest("", "", nil) return s3.GetObjectRequest{ Request: &awssdk.Request{ Data: &s3.GetObjectOutput{ Body: logBody, }, + HTTPRequest: httpReq, }, } } From ffe44ad1b79eb57dcaa4ba2701127c1bea277f43 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Thu, 18 Jul 2019 15:24:52 -0600 Subject: [PATCH 32/59] Move sendKeepAlive into separate function --- filebeat/docs/fields.asciidoc | 2 +- x-pack/filebeat/input/s3/_meta/fields.yml | 2 +- x-pack/filebeat/input/s3/fields.go | 2 +- x-pack/filebeat/input/s3/input.go | 73 +++++++++++------------ 4 files changed, 38 insertions(+), 41 deletions(-) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 8f738732eb0..3b0f91db156 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -990,7 +990,7 @@ alias to: destination.address [[exported-fields-aws]] == aws fields -Aws fields from s3 input. +AWS fields from s3 input. diff --git a/x-pack/filebeat/input/s3/_meta/fields.yml b/x-pack/filebeat/input/s3/_meta/fields.yml index 30c6766fc9e..ff370a8e924 100644 --- a/x-pack/filebeat/input/s3/_meta/fields.yml +++ b/x-pack/filebeat/input/s3/_meta/fields.yml @@ -1,7 +1,7 @@ - key: aws title: "aws" description: > - Aws fields from s3 input. + AWS fields from s3 input. release: beta fields: - name: s3 diff --git a/x-pack/filebeat/input/s3/fields.go b/x-pack/filebeat/input/s3/fields.go index ee9b3dccda4..f0d24e06839 100644 --- a/x-pack/filebeat/input/s3/fields.go +++ b/x-pack/filebeat/input/s3/fields.go @@ -19,5 +19,5 @@ func init() { // AssetS3 returns asset data. // This is the base64 encoded gzipped contents of input/s3. func AssetS3() string { - return "eJyskL1uwzAMhHc9xSF7snjTUKAv0KUPENDWOVH9I0Oka/jtCzluYaAtupSDAJKn+w48o+PqIYs6wKL19DjJoicHBGqT42QxjR5PDgCeF0Ub2QdFm9MArRDHabaLAzJ7itKjponDrvPbvzNGGeih1dYCtk70uOU0T/vkB1qp16oYizEgjm3KgxTFZd8fGUdOPTcd7Vqar90ntOO6pBwO81/QpV5kIFILu7NEefjC7lKeqOjTDZmWI98ZtptcvoVJ9Rsbu3Zc/zXLw/aPLB8BAAD//7PMioY=" + return "eJyskL1OxDAQhHs/xej6S5POBRIvQHMF5WkTT+5MfhzZGyK/PXIuoEiAaNjC0u6O5xvtGT2zhazJAOp1oMVJ1nQygGNqo5/Vh8niyQDA8+sFnefgEroYRqQafpoXrQwQOVASLRqqGOw6u/07Y5KRFqneWkDzTItbDMu8T36glbrUxViUDn7qQhylKKp9f2QcOc3S9tRrab52n9CeeQ3RHea/oEu9yEiEDnpnifLwhd6lPD5hCDdEavR8p9tuUn0LE5o3tnrtmf81y8P2jywfAQAA//9H7IpG" } diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index c393d3be761..1835121105a 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -104,11 +104,6 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return nil, err } - err = config.validate() - if err != nil { - return nil, errors.Wrapf(err, "validation for s3 input config failed: config = %v", config) - } - awsConfig, err := getAWSCredentials(config) if err != nil { return nil, errors.Wrap(err, "getAWSCredentials failed") @@ -125,7 +120,7 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return p, nil } -func (c *config) validate() error { +func (c *config) Validate() error { if c.VisibilityTimeout < 0 || c.VisibilityTimeout.Hours() > 12 { return errors.New("visibilityTimeout is not defined within the " + "expected bounds") @@ -165,13 +160,13 @@ func getAWSCredentials(config config) (awssdk.Config, error) { // Run runs the input func (p *Input) Run() { - p.logger.Debugf("s3", "Run s3 input with queueURLs: %+v", p.config.QueueURLs) + p.logger.Debugf("Run s3 input with queueURLs: %v", p.config.QueueURLs) visibilityTimeout := int64(p.config.VisibilityTimeout.Seconds()) for _, queueURL := range p.config.QueueURLs { regionName, err := getRegionFromQueueURL(queueURL) if err != nil { - p.logger.Errorf("failed to get region name from queueURL: %s", queueURL) + p.logger.Errorf("failed to get region name from queueURL: %v", queueURL) continue } @@ -240,39 +235,41 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim p.handleS3Objects(svcS3, s3Infos, errC) }(messages[i]) - go func(message sqs.Message) { - for { - select { - case <-p.close: - return - case err := <-errC: - if err != nil { - p.logger.Warnf("Processing message failed: %v", err) - err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) - if err != nil { - p.logger.Error(errors.Wrap(err, "change message visibility failed")) - } - p.logger.Warnf("Message visibility timeout updated to %v", visibilityTimeout) - } else { - err := deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) - if err != nil { - p.logger.Error(errors.Wrap(err, "deleteMessages failed")) - } - } - return - case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): - // If half of the set visibilityTimeout passed and this is - // still ongoing, then change visibility timeout. - err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) - if err != nil { - p.logger.Error(errors.Wrap(err, "change message visibility failed")) - } - p.logger.Infof("Message visibility timeout updated to %v", visibilityTimeout) + go p.sendKeepAlive(svcSQS, messages[i], queueURL, visibilityTimeout, errC) + } + wg.Wait() +} + +func (p *Input) sendKeepAlive(svcSQS *sqs.Client, message sqs.Message, queueURL string, visibilityTimeout int64, errC chan error) { + for { + select { + case <-p.close: + return + case err := <-errC: + if err != nil { + p.logger.Warnf("Processing message failed: %v", err) + err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) + if err != nil { + p.logger.Error(errors.Wrap(err, "change message visibility failed")) + } + p.logger.Warnf("Message visibility timeout updated to %v", visibilityTimeout) + } else { + err := deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) + if err != nil { + p.logger.Error(errors.Wrap(err, "deleteMessages failed")) } } - }(messages[i]) + return + case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): + // If half of the set visibilityTimeout passed and this is + // still ongoing, then change visibility timeout. + err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) + if err != nil { + p.logger.Error(errors.Wrap(err, "change message visibility failed")) + } + p.logger.Infof("Message visibility timeout updated to %v", visibilityTimeout) + } } - wg.Wait() } func changeVisibilityTimeout(queueURL string, visibilityTimeout int64, svc *sqs.Client, receiptHandle *string) error { From bc480d2ecbfd2f93d4ec5116b8f4eddd8238c775 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 19 Jul 2019 10:57:54 -0600 Subject: [PATCH 33/59] separate processMessage into a function --- x-pack/filebeat/input/s3/input.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 1835121105a..08ecb07e7a1 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -220,26 +220,26 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim // process messages received from sqs for i := range messages { errC := make(chan error) - // launch goroutine to handle each message from sqs - go func(message sqs.Message) { - defer wg.Done() - defer close(errC) - - s3Infos, err := handleSQSMessage(message) - if err != nil { - p.logger.Error(errors.Wrap(err, "handelMessage failed")) - return - } - - // read from s3 object and create event for each log line - p.handleS3Objects(svcS3, s3Infos, errC) - }(messages[i]) - + go p.processMessage(svcS3, messages[i], &wg, errC) go p.sendKeepAlive(svcSQS, messages[i], queueURL, visibilityTimeout, errC) } wg.Wait() } +func (p *Input) processMessage(svcS3 *s3.Client, message sqs.Message, wg *sync.WaitGroup, errC chan error) { + defer wg.Done() + defer close(errC) + + s3Infos, err := handleSQSMessage(message) + if err != nil { + p.logger.Error(errors.Wrap(err, "handelMessage failed")) + return + } + + // read from s3 object and create event for each log line + p.handleS3Objects(svcS3, s3Infos, errC) +} + func (p *Input) sendKeepAlive(svcSQS *sqs.Client, message sqs.Message, queueURL string, visibilityTimeout int64, errC chan error) { for { select { From 62a7b3027d71f486f85b33b88b1546f6213eace4 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Sat, 20 Jul 2019 17:27:28 -0600 Subject: [PATCH 34/59] Add bounded message queue --- x-pack/filebeat/input/s3/input.go | 59 +++++++++++++++++++------------ 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 08ecb07e7a1..634b63a6d64 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,7 +10,6 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" - "fmt" "io" "strings" "sync" @@ -41,6 +40,9 @@ var ( // than this value (however, fewer messages might be returned). maxNumberOfMessage int64 = 10 + // The maximum size of message queue for storing SQS messages. + maxMessageQueue int64 = 2 + // The duration (in seconds) for which the call waits for a message to arrive // in the queue before returning. If a message is available, the call returns // sooner than WaitTimeSeconds. If no messages are available and the wait time @@ -57,11 +59,13 @@ func init() { // Input is a input for s3 type Input struct { - outlet channel.Outleter - config config - awsConfig awssdk.Config - logger *logp.Logger - close chan struct{} + mutex sync.Mutex + outlet channel.Outleter + config config + awsConfig awssdk.Config + logger *logp.Logger + close chan struct{} + messageQueue chan sqs.Message } type s3Info struct { @@ -110,11 +114,12 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input } p := &Input{ - outlet: outlet, - config: config, - awsConfig: awsConfig, - logger: logger, - close: make(chan struct{}), + outlet: outlet, + config: config, + awsConfig: awsConfig, + logger: logger, + close: make(chan struct{}), + messageQueue: make(chan sqs.Message, maxMessageQueue), } return p, nil @@ -195,14 +200,18 @@ func (p *Input) Run() { continue } - // process messages received from sqs, get logs from s3 and create events - p.processor(queueURL, output.Messages, visibilityTimeout, svcS3, svcSQS) + for _, message := range output.Messages { + p.messageQueue <- message + // process messages received from sqs, get logs from s3 and create events + go p.processor(queueURL, visibilityTimeout, svcS3, svcSQS) + } } } // Stop stops the s3 input func (p *Input) Stop() { close(p.close) + close(p.messageQueue) defer p.outlet.Close() p.logger.Info("Stopping s3 input") } @@ -212,18 +221,22 @@ func (p *Input) Wait() { p.Stop() } -func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTimeout int64, svcS3 *s3.Client, svcSQS *sqs.Client) { - var wg sync.WaitGroup - numMessages := len(messages) - wg.Add(numMessages) +func (p *Input) processor(queueURL string, visibilityTimeout int64, svcS3 *s3.Client, svcSQS *sqs.Client) { + wg := new(sync.WaitGroup) // process messages received from sqs - for i := range messages { - errC := make(chan error) - go p.processMessage(svcS3, messages[i], &wg, errC) - go p.sendKeepAlive(svcSQS, messages[i], queueURL, visibilityTimeout, errC) + for { + select { + case msg := <-p.messageQueue: + wg.Add(1) + errC := make(chan error) + go p.processMessage(svcS3, msg, wg, errC) + go p.sendKeepAlive(svcSQS, msg, queueURL, visibilityTimeout, errC) + default: + wg.Wait() + return + } } - wg.Wait() } func (p *Input) processMessage(svcS3 *s3.Client, message sqs.Message, wg *sync.WaitGroup, errC chan error) { @@ -427,7 +440,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } From 1cedb0239d4f3c42a827d3c4c44032734527f8a4 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Sun, 21 Jul 2019 22:20:58 -0600 Subject: [PATCH 35/59] Uncomment meta in event --- x-pack/filebeat/input/s3/input.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 634b63a6d64..981c2b7a433 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,6 +10,7 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "fmt" "io" "strings" "sync" @@ -196,11 +197,12 @@ func (p *Input) Run() { } if len(output.Messages) == 0 { - p.logger.Debug("no message received from SQS:", queueURL) + p.logger.Debugf("no message received from SQS %v", queueURL) continue } for _, message := range output.Messages { + // store messages queried from sqs into a buffered channel p.messageQueue <- message // process messages received from sqs, get logs from s3 and create events go p.processor(queueURL, visibilityTimeout, svcS3, svcSQS) @@ -440,7 +442,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } From 47d378b5b4896963392d6bf2019baa09f3603ddd Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 22 Jul 2019 10:25:29 -0600 Subject: [PATCH 36/59] Revert "Uncomment meta in event" This reverts commit 742c25a69dff2b434f338f40248d53c8858217d6. --- x-pack/filebeat/input/s3/input.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 981c2b7a433..634b63a6d64 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,7 +10,6 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" - "fmt" "io" "strings" "sync" @@ -197,12 +196,11 @@ func (p *Input) Run() { } if len(output.Messages) == 0 { - p.logger.Debugf("no message received from SQS %v", queueURL) + p.logger.Debug("no message received from SQS:", queueURL) continue } for _, message := range output.Messages { - // store messages queried from sqs into a buffered channel p.messageQueue <- message // process messages received from sqs, get logs from s3 and create events go p.processor(queueURL, visibilityTimeout, svcS3, svcSQS) @@ -442,7 +440,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } From dafbea78571bdf07b0d0785cb5bd98e87be34b52 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 22 Jul 2019 10:25:58 -0600 Subject: [PATCH 37/59] Revert "Add bounded message queue" This reverts commit a53de38c7912b2bbdd3eea72b2316e330c560a75. --- x-pack/filebeat/input/s3/input.go | 59 ++++++++++++------------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 634b63a6d64..08ecb07e7a1 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,6 +10,7 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "fmt" "io" "strings" "sync" @@ -40,9 +41,6 @@ var ( // than this value (however, fewer messages might be returned). maxNumberOfMessage int64 = 10 - // The maximum size of message queue for storing SQS messages. - maxMessageQueue int64 = 2 - // The duration (in seconds) for which the call waits for a message to arrive // in the queue before returning. If a message is available, the call returns // sooner than WaitTimeSeconds. If no messages are available and the wait time @@ -59,13 +57,11 @@ func init() { // Input is a input for s3 type Input struct { - mutex sync.Mutex - outlet channel.Outleter - config config - awsConfig awssdk.Config - logger *logp.Logger - close chan struct{} - messageQueue chan sqs.Message + outlet channel.Outleter + config config + awsConfig awssdk.Config + logger *logp.Logger + close chan struct{} } type s3Info struct { @@ -114,12 +110,11 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input } p := &Input{ - outlet: outlet, - config: config, - awsConfig: awsConfig, - logger: logger, - close: make(chan struct{}), - messageQueue: make(chan sqs.Message, maxMessageQueue), + outlet: outlet, + config: config, + awsConfig: awsConfig, + logger: logger, + close: make(chan struct{}), } return p, nil @@ -200,18 +195,14 @@ func (p *Input) Run() { continue } - for _, message := range output.Messages { - p.messageQueue <- message - // process messages received from sqs, get logs from s3 and create events - go p.processor(queueURL, visibilityTimeout, svcS3, svcSQS) - } + // process messages received from sqs, get logs from s3 and create events + p.processor(queueURL, output.Messages, visibilityTimeout, svcS3, svcSQS) } } // Stop stops the s3 input func (p *Input) Stop() { close(p.close) - close(p.messageQueue) defer p.outlet.Close() p.logger.Info("Stopping s3 input") } @@ -221,22 +212,18 @@ func (p *Input) Wait() { p.Stop() } -func (p *Input) processor(queueURL string, visibilityTimeout int64, svcS3 *s3.Client, svcSQS *sqs.Client) { - wg := new(sync.WaitGroup) +func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTimeout int64, svcS3 *s3.Client, svcSQS *sqs.Client) { + var wg sync.WaitGroup + numMessages := len(messages) + wg.Add(numMessages) // process messages received from sqs - for { - select { - case msg := <-p.messageQueue: - wg.Add(1) - errC := make(chan error) - go p.processMessage(svcS3, msg, wg, errC) - go p.sendKeepAlive(svcSQS, msg, queueURL, visibilityTimeout, errC) - default: - wg.Wait() - return - } + for i := range messages { + errC := make(chan error) + go p.processMessage(svcS3, messages[i], &wg, errC) + go p.sendKeepAlive(svcSQS, messages[i], queueURL, visibilityTimeout, errC) } + wg.Wait() } func (p *Input) processMessage(svcS3 *s3.Client, message sqs.Message, wg *sync.WaitGroup, errC chan error) { @@ -440,7 +427,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } From 566c82a51a570606f2ba299605d341ae36353e3d Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 22 Jul 2019 13:47:05 -0600 Subject: [PATCH 38/59] add workerOnce to only start Run() once --- .../docs/inputs/input-aws-s3.asciidoc | 8 +-- x-pack/filebeat/input/s3/config.go | 2 +- x-pack/filebeat/input/s3/input.go | 61 +++++++++++-------- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 379f8ed75b0..b1f9d8d8e40 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -29,7 +29,7 @@ Example configuration 1: ---- {beatname_lc}.inputs: - type: s3 - queue_urls: ["https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue"] + queue_url: https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue access_key_id: '${AWS_ACCESS_KEY_ID:""}' secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' session_token: '${AWS_SESSION_TOKEN:"”}' @@ -41,7 +41,7 @@ Example configuration 2: ---- {beatname_lc}.inputs: - type: s3 - queue_urls: ["https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue"] + queue_url: https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue credential_profile_name: test-s3-input ---- @@ -49,9 +49,9 @@ The `s3` input supports the following configuration options plus the <<{beatname_lc}-input-{type}-common-options>> described later. [float] -==== `queue_urls` +==== `queue_url` -URLs of the AWS SQS queues that messages will be received from. Required. +URL of the AWS SQS queue that messages will be received from. Required. [float] ==== `visibility_timeout` diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index fe1e60aac2b..2583a074874 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -20,7 +20,7 @@ var defaultConfig = config{ type config struct { harvester.ForwarderConfig `config:",inline"` - QueueURLs []string `config:"queue_urls" validate:"nonzero,required"` + QueueURL string `config:"queue_url" validate:"nonzero,required"` AccessKeyID string `config:"access_key_id"` SecretAccessKey string `config:"secret_access_key"` SessionToken string `config:"session_token"` diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 08ecb07e7a1..243b7388d79 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,7 +10,6 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" - "fmt" "io" "strings" "sync" @@ -57,11 +56,12 @@ func init() { // Input is a input for s3 type Input struct { - outlet channel.Outleter - config config - awsConfig awssdk.Config - logger *logp.Logger - close chan struct{} + outlet channel.Outleter // Output of received s3 logs. + config config + awsConfig awssdk.Config + logger *logp.Logger + close chan struct{} + workerOnce sync.Once // Guarantees that the worker goroutine is only started once. } type s3Info struct { @@ -125,10 +125,6 @@ func (c *config) Validate() error { return errors.New("visibilityTimeout is not defined within the " + "expected bounds") } - - if len(c.QueueURLs) == 0 { - return errors.New("no SQS queueURLs are configured") - } return nil } @@ -160,14 +156,13 @@ func getAWSCredentials(config config) (awssdk.Config, error) { // Run runs the input func (p *Input) Run() { - p.logger.Debugf("Run s3 input with queueURLs: %v", p.config.QueueURLs) - visibilityTimeout := int64(p.config.VisibilityTimeout.Seconds()) + p.workerOnce.Do(func() { + p.logger.Infof("s3 input worker has started. with queueURL: %v", p.config.QueueURL) - for _, queueURL := range p.config.QueueURLs { - regionName, err := getRegionFromQueueURL(queueURL) + visibilityTimeout := int64(p.config.VisibilityTimeout.Seconds()) + regionName, err := getRegionFromQueueURL(p.config.QueueURL) if err != nil { - p.logger.Errorf("failed to get region name from queueURL: %v", queueURL) - continue + p.logger.Errorf("failed to get region name from queueURL: %v", p.config.QueueURL) } awsConfig := p.awsConfig.Copy() @@ -175,35 +170,51 @@ func (p *Input) Run() { svcSQS := sqs.New(awsConfig) svcS3 := s3.New(awsConfig) + go p.run(svcSQS, svcS3, visibilityTimeout) + go p.runKeepAlive() + }) +} + +func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int64) { + for { // receive messages from sqs req := svcSQS.ReceiveMessageRequest( &sqs.ReceiveMessageInput{ - QueueUrl: &queueURL, + QueueUrl: &p.config.QueueURL, MessageAttributeNames: []string{"All"}, MaxNumberOfMessages: &maxNumberOfMessage, VisibilityTimeout: &visibilityTimeout, WaitTimeSeconds: &waitTimeSecond, }) + output, err := req.Send(context.Background()) if err != nil { p.logger.Error("failed to receive message from SQS:", err) - continue + close(p.close) } - if len(output.Messages) == 0 { - p.logger.Debug("no message received from SQS:", queueURL) + if output == nil || len(output.Messages) == 0 { + p.logger.Debug("no message received from SQS:", p.config.QueueURL) continue } // process messages received from sqs, get logs from s3 and create events - p.processor(queueURL, output.Messages, visibilityTimeout, svcS3, svcSQS) + p.processor(p.config.QueueURL, output.Messages, visibilityTimeout, svcS3, svcSQS) + } +} + +func (p *Input) runKeepAlive() { + select { + case <-p.close: + return + default: } } // Stop stops the s3 input func (p *Input) Stop() { - close(p.close) defer p.outlet.Close() + close(p.close) p.logger.Info("Stopping s3 input") } @@ -221,7 +232,7 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim for i := range messages { errC := make(chan error) go p.processMessage(svcS3, messages[i], &wg, errC) - go p.sendKeepAlive(svcSQS, messages[i], queueURL, visibilityTimeout, errC) + go p.processorKeepAlive(svcSQS, messages[i], queueURL, visibilityTimeout, errC) } wg.Wait() } @@ -240,7 +251,7 @@ func (p *Input) processMessage(svcS3 *s3.Client, message sqs.Message, wg *sync.W p.handleS3Objects(svcS3, s3Infos, errC) } -func (p *Input) sendKeepAlive(svcSQS *sqs.Client, message sqs.Message, queueURL string, visibilityTimeout int64, errC chan error) { +func (p *Input) processorKeepAlive(svcSQS *sqs.Client, message sqs.Message, queueURL string, visibilityTimeout int64, errC chan error) { for { select { case <-p.close: @@ -427,7 +438,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } From 1c46b0351f263ff92796c00d3311830777e1c7de Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 22 Jul 2019 13:54:43 -0600 Subject: [PATCH 39/59] Rebase to use x-pack libbeat common GetAWSCredentials --- x-pack/filebeat/input/s3/config.go | 7 ++----- x-pack/filebeat/input/s3/input.go | 31 ++---------------------------- 2 files changed, 4 insertions(+), 34 deletions(-) diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index 2583a074874..aa4e72ef8a5 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -8,22 +8,19 @@ import ( "time" "github.com/elastic/beats/filebeat/harvester" + awscommon "github.com/elastic/beats/x-pack/libbeat/common/aws" ) var defaultConfig = config{ ForwarderConfig: harvester.ForwarderConfig{ Type: "s3", }, - ProfileName: "default", VisibilityTimeout: 300 * time.Second, } type config struct { harvester.ForwarderConfig `config:",inline"` QueueURL string `config:"queue_url" validate:"nonzero,required"` - AccessKeyID string `config:"access_key_id"` - SecretAccessKey string `config:"secret_access_key"` - SessionToken string `config:"session_token"` - ProfileName string `config:"credential_profile_name"` VisibilityTimeout time.Duration `config:"visibility_timeout"` + AwsConfig awscommon.ConfigAWS } diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 243b7388d79..3e97a19287e 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -16,8 +16,6 @@ import ( "time" awssdk "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/aws/defaults" - "github.com/aws/aws-sdk-go-v2/aws/external" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/s3iface" "github.com/aws/aws-sdk-go-v2/service/sqs" @@ -31,6 +29,7 @@ import ( "github.com/elastic/beats/libbeat/common" "github.com/elastic/beats/libbeat/common/cfgwarn" "github.com/elastic/beats/libbeat/logp" + awscommon "github.com/elastic/beats/x-pack/libbeat/common/aws" ) const inputName = "s3" @@ -104,7 +103,7 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return nil, err } - awsConfig, err := getAWSCredentials(config) + awsConfig, err := awscommon.GetAWSCredentials(config.AwsConfig) if err != nil { return nil, errors.Wrap(err, "getAWSCredentials failed") } @@ -128,32 +127,6 @@ func (c *config) Validate() error { return nil } -func getAWSCredentials(config config) (awssdk.Config, error) { - // Check if accessKeyID and secretAccessKey is given from configuration - if config.AccessKeyID != "" && config.SecretAccessKey != "" { - awsConfig := defaults.Config() - awsCredentials := awssdk.Credentials{ - AccessKeyID: config.AccessKeyID, - SecretAccessKey: config.SecretAccessKey, - } - if config.SessionToken != "" { - awsCredentials.SessionToken = config.SessionToken - } - - awsConfig.Credentials = awssdk.StaticCredentialsProvider{ - Value: awsCredentials, - } - return awsConfig, nil - } - - // If accessKeyID and secretAccessKey is not given, then load from default config - // Please see https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html - // with more details. - return external.LoadDefaultAWSConfig( - external.WithSharedConfigProfile(config.ProfileName), - ) -} - // Run runs the input func (p *Input) Run() { p.workerOnce.Do(func() { From 5a8123528fc3c1921bcd12fdad061dc4db9b4f64 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 22 Jul 2019 17:39:18 -0600 Subject: [PATCH 40/59] Change defaultConfig to be a function --- x-pack/filebeat/input/s3/config.go | 16 +++++++++------- x-pack/filebeat/input/s3/input.go | 14 ++++++++++---- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index aa4e72ef8a5..398d74fa6d8 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -11,16 +11,18 @@ import ( awscommon "github.com/elastic/beats/x-pack/libbeat/common/aws" ) -var defaultConfig = config{ - ForwarderConfig: harvester.ForwarderConfig{ - Type: "s3", - }, - VisibilityTimeout: 300 * time.Second, -} - type config struct { harvester.ForwarderConfig `config:",inline"` QueueURL string `config:"queue_url" validate:"nonzero,required"` VisibilityTimeout time.Duration `config:"visibility_timeout"` AwsConfig awscommon.ConfigAWS } + +func defaultConfig() config { + return config{ + ForwarderConfig: harvester.ForwarderConfig{ + Type: "s3", + }, + VisibilityTimeout: 300 * time.Second, + } +} diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 3e97a19287e..b85a94756ce 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,6 +10,7 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "fmt" "io" "strings" "sync" @@ -90,14 +91,20 @@ type sqsMessage struct { // NewInput creates a new s3 input func NewInput(cfg *common.Config, outletFactory channel.Connector, context input.Context) (input.Input, error) { cfgwarn.Beta("s3 input type is used") - logger := logp.NewLogger(inputName) - config := defaultConfig + config := defaultConfig() if err := cfg.Unpack(&config); err != nil { return nil, errors.Wrap(err, "failed unpacking config") } + var awsCred awscommon.ConfigAWS + if err := cfg.Unpack(&awsCred); err != nil { + return nil, errors.Wrap(err, "failed unpacking aws config") + } + + config.AwsConfig = awsCred + outlet, err := outletFactory(cfg, context.DynamicFields) if err != nil { return nil, err @@ -115,7 +122,6 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input logger: logger, close: make(chan struct{}), } - return p, nil } @@ -411,7 +417,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } From 6b06d0fa19723614c1486235b710b2b4b627cadf Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 23 Jul 2019 07:43:22 -0600 Subject: [PATCH 41/59] Move Validate into config.go --- x-pack/filebeat/input/s3/config.go | 15 +++++++-- x-pack/filebeat/input/s3/input.go | 52 +++++++++++++----------------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/x-pack/filebeat/input/s3/config.go b/x-pack/filebeat/input/s3/config.go index 398d74fa6d8..7b79022371d 100644 --- a/x-pack/filebeat/input/s3/config.go +++ b/x-pack/filebeat/input/s3/config.go @@ -5,6 +5,7 @@ package s3 import ( + "fmt" "time" "github.com/elastic/beats/filebeat/harvester" @@ -13,9 +14,9 @@ import ( type config struct { harvester.ForwarderConfig `config:",inline"` - QueueURL string `config:"queue_url" validate:"nonzero,required"` - VisibilityTimeout time.Duration `config:"visibility_timeout"` - AwsConfig awscommon.ConfigAWS + QueueURL string `config:"queue_url" validate:"nonzero,required"` + VisibilityTimeout time.Duration `config:"visibility_timeout"` + AwsConfig awscommon.ConfigAWS `config:",inline"` } func defaultConfig() config { @@ -26,3 +27,11 @@ func defaultConfig() config { VisibilityTimeout: 300 * time.Second, } } + +func (c *config) Validate() error { + if c.VisibilityTimeout < 0 || c.VisibilityTimeout.Hours() > 12 { + return fmt.Errorf("visibility timeout %v is not within the "+ + "required range 0s to 12h", c.VisibilityTimeout) + } + return nil +} diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index b85a94756ce..43622517ad5 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -71,20 +71,26 @@ type s3Info struct { arn string } +type bucket struct { + Name string `json:"name"` + Arn string `json:"arn"` +} + +type object struct { + Key string `json:"key"` +} + +type s3BucketOjbect struct { + bucket `json:"bucket"` + object `json:"object"` +} + type sqsMessage struct { Records []struct { - EventSource string `json:"eventSource"` - AwsRegion string `json:"awsRegion"` - EventName string `json:"eventName"` - S3 struct { - Bucket struct { - Name string `json:"name"` - Arn string `json:"arn"` - } `json:"bucket"` - Object struct { - Key string `json:"key"` - } `json:"object"` - } `json:"s3"` + EventSource string `json:"eventSource"` + AwsRegion string `json:"awsRegion"` + EventName string `json:"eventName"` + S3 s3BucketOjbect `json:"s3"` } `json:"Records"` } @@ -98,13 +104,6 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return nil, errors.Wrap(err, "failed unpacking config") } - var awsCred awscommon.ConfigAWS - if err := cfg.Unpack(&awsCred); err != nil { - return nil, errors.Wrap(err, "failed unpacking aws config") - } - - config.AwsConfig = awsCred - outlet, err := outletFactory(cfg, context.DynamicFields) if err != nil { return nil, err @@ -125,17 +124,10 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return p, nil } -func (c *config) Validate() error { - if c.VisibilityTimeout < 0 || c.VisibilityTimeout.Hours() > 12 { - return errors.New("visibilityTimeout is not defined within the " + - "expected bounds") - } - return nil -} - // Run runs the input func (p *Input) Run() { p.workerOnce.Do(func() { + defer p.logger.Infof("S3 input worker for '%v' has stopped.", p.config.QueueURL) p.logger.Infof("s3 input worker has started. with queueURL: %v", p.config.QueueURL) visibilityTimeout := int64(p.config.VisibilityTimeout.Seconds()) @@ -295,9 +287,9 @@ func handleSQSMessage(m sqs.Message) ([]s3Info, error) { if record.EventSource == "aws:s3" && record.EventName == "ObjectCreated:Put" { s3Infos = append(s3Infos, s3Info{ region: record.AwsRegion, - name: record.S3.Bucket.Name, - key: record.S3.Object.Key, - arn: record.S3.Bucket.Arn, + name: record.S3.bucket.Name, + key: record.S3.object.Key, + arn: record.S3.bucket.Arn, }) } } From efa648f38e6eb04aeaca2c7b43d77299d439dc60 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 23 Jul 2019 09:50:27 -0600 Subject: [PATCH 42/59] Add channelContext --- x-pack/filebeat/input/s3/input.go | 66 +++++++++++++++++++------------ 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 43622517ad5..143f70b8e72 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -62,6 +62,7 @@ type Input struct { logger *logp.Logger close chan struct{} workerOnce sync.Once // Guarantees that the worker goroutine is only started once. + context *channelContext } type s3Info struct { @@ -94,6 +95,23 @@ type sqsMessage struct { } `json:"Records"` } +// channelContext implements context.Context by wrapping a channel +type channelContext struct { + done <-chan struct{} +} + +func (r *channelContext) Deadline() (time.Time, bool) { return time.Time{}, false } +func (r *channelContext) Done() <-chan struct{} { return r.done } +func (r *channelContext) Err() error { + select { + case <-r.done: + return context.Canceled + default: + return nil + } +} +func (r *channelContext) Value(key interface{}) interface{} { return nil } + // NewInput creates a new s3 input func NewInput(cfg *common.Config, outletFactory channel.Connector, context input.Context) (input.Input, error) { cfgwarn.Beta("s3 input type is used") @@ -114,12 +132,14 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return nil, errors.Wrap(err, "getAWSCredentials failed") } + closeChannel := make(chan struct{}) p := &Input{ outlet: outlet, config: config, awsConfig: awsConfig, logger: logger, - close: make(chan struct{}), + close: closeChannel, + context: &channelContext{closeChannel}, } return p, nil } @@ -127,9 +147,6 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input // Run runs the input func (p *Input) Run() { p.workerOnce.Do(func() { - defer p.logger.Infof("S3 input worker for '%v' has stopped.", p.config.QueueURL) - p.logger.Infof("s3 input worker has started. with queueURL: %v", p.config.QueueURL) - visibilityTimeout := int64(p.config.VisibilityTimeout.Seconds()) regionName, err := getRegionFromQueueURL(p.config.QueueURL) if err != nil { @@ -142,12 +159,18 @@ func (p *Input) Run() { svcS3 := s3.New(awsConfig) go p.run(svcSQS, svcS3, visibilityTimeout) - go p.runKeepAlive() }) } func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int64) { + defer p.logger.Infof("S3 input worker for '%v' has stopped.", p.config.QueueURL) + p.logger.Infof("s3 input worker has started. with queueURL: %v", p.config.QueueURL) for { + select { + case <-p.close: + return + default: + } // receive messages from sqs req := svcSQS.ReceiveMessageRequest( &sqs.ReceiveMessageInput{ @@ -158,10 +181,11 @@ func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int6 WaitTimeSeconds: &waitTimeSecond, }) - output, err := req.Send(context.Background()) + output, err := req.Send(p.context) if err != nil { p.logger.Error("failed to receive message from SQS:", err) - close(p.close) + time.Sleep(time.Duration(waitTimeSecond) * time.Second) + continue } if output == nil || len(output.Messages) == 0 { @@ -174,14 +198,6 @@ func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int6 } } -func (p *Input) runKeepAlive() { - select { - case <-p.close: - return - default: - } -} - // Stop stops the s3 input func (p *Input) Stop() { defer p.outlet.Close() @@ -230,13 +246,13 @@ func (p *Input) processorKeepAlive(svcSQS *sqs.Client, message sqs.Message, queu case err := <-errC: if err != nil { p.logger.Warnf("Processing message failed: %v", err) - err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) + err := p.changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) if err != nil { p.logger.Error(errors.Wrap(err, "change message visibility failed")) } p.logger.Warnf("Message visibility timeout updated to %v", visibilityTimeout) } else { - err := deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) + err := p.deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) if err != nil { p.logger.Error(errors.Wrap(err, "deleteMessages failed")) } @@ -245,7 +261,7 @@ func (p *Input) processorKeepAlive(svcSQS *sqs.Client, message sqs.Message, queu case <-time.After(time.Duration(visibilityTimeout/2) * time.Second): // If half of the set visibilityTimeout passed and this is // still ongoing, then change visibility timeout. - err := changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) + err := p.changeVisibilityTimeout(queueURL, visibilityTimeout, svcSQS, message.ReceiptHandle) if err != nil { p.logger.Error(errors.Wrap(err, "change message visibility failed")) } @@ -254,13 +270,13 @@ func (p *Input) processorKeepAlive(svcSQS *sqs.Client, message sqs.Message, queu } } -func changeVisibilityTimeout(queueURL string, visibilityTimeout int64, svc *sqs.Client, receiptHandle *string) error { +func (p *Input) changeVisibilityTimeout(queueURL string, visibilityTimeout int64, svc *sqs.Client, receiptHandle *string) error { req := svc.ChangeMessageVisibilityRequest(&sqs.ChangeMessageVisibilityInput{ QueueUrl: &queueURL, VisibilityTimeout: &visibilityTimeout, ReceiptHandle: receiptHandle, }) - _, err := req.Send(context.Background()) + _, err := req.Send(p.context) return err } @@ -307,7 +323,7 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch objectHash := s3ObjectHash(s3Info) // read from s3 object - reader, err := bufferedIORead(svc, s3Info) + reader, err := p.bufferedIORead(svc, s3Info) if err != nil { errC <- errors.Wrap(err, "bufferedIORead failed") return @@ -349,14 +365,14 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch } } -func bufferedIORead(svc s3iface.ClientAPI, s3Info s3Info) (*bufio.Reader, error) { +func (p *Input) bufferedIORead(svc s3iface.ClientAPI, s3Info s3Info) (*bufio.Reader, error) { s3GetObjectInput := &s3.GetObjectInput{ Bucket: awssdk.String(s3Info.name), Key: awssdk.String(s3Info.key), } req := svc.GetObjectRequest(s3GetObjectInput) - resp, err := req.Send(context.Background()) + resp, err := req.Send(p.context) if err != nil { return nil, errors.Wrapf(err, "s3 get object request failed %v", s3Info.key) } @@ -374,14 +390,14 @@ func (p *Input) forwardEvent(event *beat.Event) error { return nil } -func deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.Client) error { +func (p *Input) deleteMessage(queueURL string, messagesReceiptHandle string, svcSQS *sqs.Client) error { deleteMessageInput := &sqs.DeleteMessageInput{ QueueUrl: awssdk.String(queueURL), ReceiptHandle: awssdk.String(messagesReceiptHandle), } req := svcSQS.DeleteMessageRequest(deleteMessageInput) - _, err := req.Send(context.Background()) + _, err := req.Send(p.context) if err != nil { return errors.Wrap(err, "DeleteMessageRequest failed") } From a271dc13f14376a86919c75c43a2f501328c5bbf Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 23 Jul 2019 10:15:52 -0600 Subject: [PATCH 43/59] Add s3Context --- x-pack/filebeat/input/s3/input.go | 46 +++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 143f70b8e72..51e4f4b8fa6 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -86,6 +86,13 @@ type s3BucketOjbect struct { object `json:"object"` } +type s3Context struct { + mux sync.Mutex + refs int + err error // first error witnessed or multi error + errC chan error +} + type sqsMessage struct { Records []struct { EventSource string `json:"eventSource"` @@ -226,7 +233,6 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim func (p *Input) processMessage(svcS3 *s3.Client, message sqs.Message, wg *sync.WaitGroup, errC chan error) { defer wg.Done() - defer close(errC) s3Infos, err := handleSQSMessage(message) if err != nil { @@ -312,20 +318,50 @@ func handleSQSMessage(m sqs.Message) ([]s3Info, error) { return s3Infos, nil } +func (c *s3Context) Done() { + c.mux.Lock() + defer c.mux.Unlock() + c.done() +} + +func (c *s3Context) Fail(err error) { + c.mux.Lock() + defer c.mux.Unlock() + + // only care about the last error for now + // TODO: add "Typed" error to error for context + c.err = err + c.done() +} + +func (c *s3Context) done() { + c.refs-- + if c.refs == 0 { + c.errC <- c.err + close(c.errC) + } +} + func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC chan error) { if len(s3Infos) > 0 { var wg sync.WaitGroup wg.Add(len(s3Infos)) + s3Context := &s3Context{ + refs: len(s3Infos), + errC: errC, + } + for i := range s3Infos { go func(s3Info s3Info) { defer wg.Done() + defer s3Context.Done() objectHash := s3ObjectHash(s3Info) // read from s3 object reader, err := p.bufferedIORead(svc, s3Info) if err != nil { - errC <- errors.Wrap(err, "bufferedIORead failed") + s3Context.Fail(errors.Wrap(err, "bufferedIORead failed")) return } @@ -342,12 +378,12 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch offset += len([]byte(log)) err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { - errC <- errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) + s3Context.Fail(errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key)) } return } - errC <- errors.Wrapf(err, "ReadString failed for %v", s3Info.key) + s3Context.Fail(errors.Wrapf(err, "ReadString failed for %v", s3Info.key)) return } @@ -355,7 +391,7 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch offset += len([]byte(log)) err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { - errC <- errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) + s3Context.Fail(errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key)) return } } From da21e065aaf6a0c5860b19c1b9424a18b3a55ed3 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 23 Jul 2019 10:39:13 -0600 Subject: [PATCH 44/59] Remove parallel for handleS3Objects --- x-pack/filebeat/input/s3/input.go | 116 ++++++++++-------------------- 1 file changed, 36 insertions(+), 80 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 51e4f4b8fa6..033311b0afe 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,7 +10,6 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" - "fmt" "io" "strings" "sync" @@ -86,13 +85,6 @@ type s3BucketOjbect struct { object `json:"object"` } -type s3Context struct { - mux sync.Mutex - refs int - err error // first error witnessed or multi error - errC chan error -} - type sqsMessage struct { Records []struct { EventSource string `json:"eventSource"` @@ -233,6 +225,7 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim func (p *Input) processMessage(svcS3 *s3.Client, message sqs.Message, wg *sync.WaitGroup, errC chan error) { defer wg.Done() + defer close(errC) s3Infos, err := handleSQSMessage(message) if err != nil { @@ -241,7 +234,11 @@ func (p *Input) processMessage(svcS3 *s3.Client, message sqs.Message, wg *sync.W } // read from s3 object and create event for each log line - p.handleS3Objects(svcS3, s3Infos, errC) + err = p.handleS3Objects(svcS3, s3Infos) + if err != nil { + errC <- errors.Wrap(err, "handleS3Objects failed") + p.logger.Error(errors.Wrap(err, "handleS3Objects failed")) + } } func (p *Input) processorKeepAlive(svcSQS *sqs.Client, message sqs.Message, queueURL string, visibilityTimeout int64, errC chan error) { @@ -258,6 +255,7 @@ func (p *Input) processorKeepAlive(svcSQS *sqs.Client, message sqs.Message, queu } p.logger.Warnf("Message visibility timeout updated to %v", visibilityTimeout) } else { + // only delete sqs message when errC is closed with no error err := p.deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) if err != nil { p.logger.Error(errors.Wrap(err, "deleteMessages failed")) @@ -318,87 +316,45 @@ func handleSQSMessage(m sqs.Message) ([]s3Info, error) { return s3Infos, nil } -func (c *s3Context) Done() { - c.mux.Lock() - defer c.mux.Unlock() - c.done() -} - -func (c *s3Context) Fail(err error) { - c.mux.Lock() - defer c.mux.Unlock() - - // only care about the last error for now - // TODO: add "Typed" error to error for context - c.err = err - c.done() -} - -func (c *s3Context) done() { - c.refs-- - if c.refs == 0 { - c.errC <- c.err - close(c.errC) - } -} +func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info) error { + for _, s3Info := range s3Infos { + objectHash := s3ObjectHash(s3Info) -func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC chan error) { - if len(s3Infos) > 0 { - var wg sync.WaitGroup - wg.Add(len(s3Infos)) - - s3Context := &s3Context{ - refs: len(s3Infos), - errC: errC, + // read from s3 object + reader, err := p.bufferedIORead(svc, s3Info) + if err != nil { + return errors.Wrap(err, "bufferedIORead failed") } - for i := range s3Infos { - go func(s3Info s3Info) { - defer wg.Done() - defer s3Context.Done() - objectHash := s3ObjectHash(s3Info) - - // read from s3 object - reader, err := p.bufferedIORead(svc, s3Info) - if err != nil { - s3Context.Fail(errors.Wrap(err, "bufferedIORead failed")) - return - } - - offset := 0 - for { - log, err := reader.ReadString('\n') - if log == "" { - break - } - - if err != nil { - if err == io.EOF { - // create event for last line - offset += len([]byte(log)) - err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) - if err != nil { - s3Context.Fail(errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key)) - } - return - } - - s3Context.Fail(errors.Wrapf(err, "ReadString failed for %v", s3Info.key)) - return - } + offset := 0 + for { + log, err := reader.ReadString('\n') + if log == "" { + break + } - // create event per log line + if err != nil { + if err == io.EOF { + // create event for last line offset += len([]byte(log)) err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) if err != nil { - s3Context.Fail(errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key)) - return + return errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) } + return nil } - }(s3Infos[i]) + return errors.Wrapf(err, "ReadString failed for %v", s3Info.key) + } + + // create event per log line + offset += len([]byte(log)) + err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) + if err != nil { + return errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) + } } - wg.Wait() } + return nil } func (p *Input) bufferedIORead(svc s3iface.ClientAPI, s3Info s3Info) (*bufio.Reader, error) { @@ -461,7 +417,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } From d733b3ac35f940fceaa342f2d3a4f27f5c847d8e Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 23 Jul 2019 10:42:14 -0600 Subject: [PATCH 45/59] Uncomment Meta in event --- x-pack/filebeat/input/s3/input.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 033311b0afe..001a517ef59 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,6 +10,7 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" + "fmt" "io" "strings" "sync" @@ -417,7 +418,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat }, } return &beat.Event{ - // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, } From 8943d13210a90369faaf2956055e361275265a11 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 23 Jul 2019 11:15:34 -0600 Subject: [PATCH 46/59] fix unit test --- x-pack/filebeat/input/s3/input_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 4927965b5f7..9b317281ab1 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -35,6 +35,7 @@ var ( key: "log2019-06-21-16-16-54", region: "us-west-1", } + p = Input{} ) func (m *MockS3Client) GetObjectRequest(input *s3.GetObjectInput) s3.GetObjectRequest { @@ -105,7 +106,7 @@ func TestHandleMessage(t *testing.T) { } func TestBufferedIORead(t *testing.T) { - reader, err := bufferedIORead(mockSvc, info) + reader, err := p.bufferedIORead(mockSvc, info) assert.NoError(t, err) for i := 0; i < 3; i++ { switch i { @@ -135,7 +136,7 @@ func TestCreateEvent(t *testing.T) { } s3ObjectHash := s3ObjectHash(s3Info) - reader, err := bufferedIORead(mockSvc, s3Info) + reader, err := p.bufferedIORead(mockSvc, s3Info) assert.NoError(t, err) var events []*beat.Event for { From bce8452cac6665949a7e77ace9e55c7030c7ff9a Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 23 Jul 2019 13:08:53 -0600 Subject: [PATCH 47/59] Replace select case with p.context.Err() == nil --- x-pack/filebeat/input/s3/input.go | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 001a517ef59..60199c16110 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -100,17 +100,17 @@ type channelContext struct { done <-chan struct{} } -func (r *channelContext) Deadline() (time.Time, bool) { return time.Time{}, false } -func (r *channelContext) Done() <-chan struct{} { return r.done } -func (r *channelContext) Err() error { +func (c *channelContext) Deadline() (time.Time, bool) { return time.Time{}, false } +func (c *channelContext) Done() <-chan struct{} { return c.done } +func (c *channelContext) Err() error { select { - case <-r.done: + case <-c.done: return context.Canceled default: return nil } } -func (r *channelContext) Value(key interface{}) interface{} { return nil } +func (c *channelContext) Value(key interface{}) interface{} { return nil } // NewInput creates a new s3 input func NewInput(cfg *common.Config, outletFactory channel.Connector, context input.Context) (input.Input, error) { @@ -165,12 +165,7 @@ func (p *Input) Run() { func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int64) { defer p.logger.Infof("S3 input worker for '%v' has stopped.", p.config.QueueURL) p.logger.Infof("s3 input worker has started. with queueURL: %v", p.config.QueueURL) - for { - select { - case <-p.close: - return - default: - } + for p.context.Err() == nil { // receive messages from sqs req := svcSQS.ReceiveMessageRequest( &sqs.ReceiveMessageInput{ From c91674551a3f2339a0bda629d6fab6211c5b8754 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 29 Jul 2019 09:03:38 -0600 Subject: [PATCH 48/59] rebase and update to use new connector --- CHANGELOG.next.asciidoc | 1 - x-pack/filebeat/input/s3/input.go | 26 +++++++++++++------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 47fd6ed0595..ec96d247ce1 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -250,7 +250,6 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Parse more fields from Elasticsearch slowlogs. {pull}11939[11939] - Update module pipelines to enrich events with autonomous system fields. {pull}13036[13036] - Add module for ingesting IBM MQ logs. {pull}8782[8782] -- Add S3 input to retrieve logs from AWS S3 buckets. - Add S3 input to retrieve logs from AWS S3 buckets. {pull}12640[12640] {issue}12582[12582] *Heartbeat* diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 60199c16110..6696ce4c612 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -23,9 +23,7 @@ import ( "github.com/pkg/errors" "github.com/elastic/beats/filebeat/channel" - "github.com/elastic/beats/filebeat/harvester" "github.com/elastic/beats/filebeat/input" - "github.com/elastic/beats/filebeat/util" "github.com/elastic/beats/libbeat/beat" "github.com/elastic/beats/libbeat/common" "github.com/elastic/beats/libbeat/common/cfgwarn" @@ -113,7 +111,7 @@ func (c *channelContext) Err() error { func (c *channelContext) Value(key interface{}) interface{} { return nil } // NewInput creates a new s3 input -func NewInput(cfg *common.Config, outletFactory channel.Connector, context input.Context) (input.Input, error) { +func NewInput(cfg *common.Config, connector channel.Connector, context input.Context) (input.Input, error) { cfgwarn.Beta("s3 input type is used") logger := logp.NewLogger(inputName) @@ -122,7 +120,11 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input return nil, errors.Wrap(err, "failed unpacking config") } - outlet, err := outletFactory(cfg, context.DynamicFields) + out, err := connector.ConnectWith(cfg, beat.ClientConfig{ + Processing: beat.ProcessingConfig{ + DynamicFields: context.DynamicFields, + }, + }) if err != nil { return nil, err } @@ -134,7 +136,7 @@ func NewInput(cfg *common.Config, outletFactory channel.Connector, context input closeChannel := make(chan struct{}) p := &Input{ - outlet: outlet, + outlet: out, config: config, awsConfig: awsConfig, logger: logger, @@ -368,12 +370,10 @@ func (p *Input) bufferedIORead(svc s3iface.ClientAPI, s3Info s3Info) (*bufio.Rea return bufio.NewReader(resp.Body), nil } -func (p *Input) forwardEvent(event *beat.Event) error { - forwarder := harvester.NewForwarder(p.outlet) - d := &util.Data{Event: *event} - err := forwarder.Send(d) - if err != nil { - return errors.Wrap(err, "forwarder send failed") +func (p *Input) forwardEvent(event beat.Event) error { + ok := p.outlet.OnEvent(event) + if !ok { + return errors.New("input outlet closed") } return nil } @@ -392,7 +392,7 @@ func (p *Input) deleteMessage(queueURL string, messagesReceiptHandle string, svc return nil } -func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat.Event { +func createEvent(log string, offset int, s3Info s3Info, objectHash string) beat.Event { f := common.MapStr{ "message": log, "log": common.MapStr{ @@ -412,7 +412,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) *beat "region": s3Info.region, }, } - return &beat.Event{ + return beat.Event{ Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, From c3a26ae503d48b1b74bb130ab490f7ccd1cb4cf8 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 29 Jul 2019 11:27:43 -0600 Subject: [PATCH 49/59] Add end to end ACK in connector --- x-pack/filebeat/input/s3/input.go | 76 ++++++++++++++++++++++---- x-pack/filebeat/input/s3/input_test.go | 14 ++++- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 6696ce4c612..bd7b9572e2b 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -93,6 +93,13 @@ type sqsMessage struct { } `json:"Records"` } +type s3Context struct { + mux sync.Mutex + refs int + err error // first error witnessed or multi error + errC chan error +} + // channelContext implements context.Context by wrapping a channel type channelContext struct { done <-chan struct{} @@ -124,6 +131,13 @@ func NewInput(cfg *common.Config, connector channel.Connector, context input.Con Processing: beat.ProcessingConfig{ DynamicFields: context.DynamicFields, }, + ACKEvents: func(privates []interface{}) { + for _, private := range privates { + if s3Context, ok := private.(*s3Context); ok { + s3Context.done() + } + } + }, }) if err != nil { return nil, err @@ -223,19 +237,19 @@ func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTim func (p *Input) processMessage(svcS3 *s3.Client, message sqs.Message, wg *sync.WaitGroup, errC chan error) { defer wg.Done() - defer close(errC) s3Infos, err := handleSQSMessage(message) if err != nil { - p.logger.Error(errors.Wrap(err, "handelMessage failed")) + p.logger.Error(errors.Wrap(err, "handleMessage failed")) return } // read from s3 object and create event for each log line - err = p.handleS3Objects(svcS3, s3Infos) + err = p.handleS3Objects(svcS3, s3Infos, errC) if err != nil { - errC <- errors.Wrap(err, "handleS3Objects failed") - p.logger.Error(errors.Wrap(err, "handleS3Objects failed")) + err = errors.Wrap(err, "handleS3Objects failed") + errC <- err + p.logger.Error(err) } } @@ -253,6 +267,7 @@ func (p *Input) processorKeepAlive(svcSQS *sqs.Client, message sqs.Message, queu } p.logger.Warnf("Message visibility timeout updated to %v", visibilityTimeout) } else { + p.logger.Debug("ACK done, deleting message from SQS") // only delete sqs message when errC is closed with no error err := p.deleteMessage(queueURL, *message.ReceiptHandle, svcSQS) if err != nil { @@ -314,7 +329,11 @@ func handleSQSMessage(m sqs.Message) ([]s3Info, error) { return s3Infos, nil } -func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info) error { +func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC chan error) error { + s3Context := &s3Context{ + errC: errC, + } + for _, s3Info := range s3Infos { objectHash := s3ObjectHash(s3Info) @@ -335,9 +354,12 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info) error { if err == io.EOF { // create event for last line offset += len([]byte(log)) - err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) + event := createEvent(log, offset, s3Info, objectHash, s3Context) + err = p.forwardEvent(event) if err != nil { - return errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) + err = errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) + s3Context.Fail(err) + return err } return nil } @@ -346,9 +368,12 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info) error { // create event per log line offset += len([]byte(log)) - err = p.forwardEvent(createEvent(log, offset, s3Info, objectHash)) + event := createEvent(log, offset, s3Info, objectHash, s3Context) + err = p.forwardEvent(event) if err != nil { - return errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) + err = errors.Wrapf(err, "forwardEvent failed for %v", s3Info.key) + s3Context.Fail(err) + return err } } } @@ -392,7 +417,7 @@ func (p *Input) deleteMessage(queueURL string, messagesReceiptHandle string, svc return nil } -func createEvent(log string, offset int, s3Info s3Info, objectHash string) beat.Event { +func createEvent(log string, offset int, s3Info s3Info, objectHash string, s3Context *s3Context) beat.Event { f := common.MapStr{ "message": log, "log": common.MapStr{ @@ -412,10 +437,13 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string) beat. "region": s3Info.region, }, } + + s3Context.Inc() return beat.Event{ - Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, Timestamp: time.Now(), Fields: f, + Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + Private: s3Context, } } @@ -430,3 +458,27 @@ func s3ObjectHash(s3Info s3Info) string { prefix := hex.EncodeToString(h.Sum(nil)) return prefix[:10] } + +func (c *s3Context) Fail(err error) { + c.mux.Lock() + defer c.mux.Unlock() + + // only care about the last error for now + // TODO: add "Typed" error to error for context + c.err = err + c.done() +} + +func (c *s3Context) done() { + c.refs-- + if c.refs == 0 { + c.errC <- c.err + close(c.errC) + } +} + +func (c *s3Context) Inc() { + c.mux.Lock() + defer c.mux.Unlock() + c.refs++ +} diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 9b317281ab1..0e7a9da3cb2 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -127,6 +127,12 @@ func TestBufferedIORead(t *testing.T) { } func TestCreateEvent(t *testing.T) { + errC := make(chan error) + s3Context := &s3Context{ + refs: 1, + errC: errC, + } + mockSvc := &MockS3Client{} s3Info := s3Info{ name: "test-s3-ks", @@ -138,19 +144,19 @@ func TestCreateEvent(t *testing.T) { reader, err := p.bufferedIORead(mockSvc, s3Info) assert.NoError(t, err) - var events []*beat.Event + var events []beat.Event for { log, err := reader.ReadString('\n') if log == "" { break } if err == io.EOF { - event := createEvent(log, len([]byte(log)), s3Info, s3ObjectHash) + event := createEvent(log, len([]byte(log)), s3Info, s3ObjectHash, s3Context) events = append(events, event) break } - event := createEvent(log, len([]byte(log)), s3Info, s3ObjectHash) + event := createEvent(log, len([]byte(log)), s3Info, s3ObjectHash, s3Context) events = append(events, event) } @@ -179,6 +185,8 @@ func TestCreateEvent(t *testing.T) { message2, err := events[1].Fields.GetValue("message") assert.NoError(t, err) assert.Equal(t, s3LogString2, message2.(string)) + + s3Context.done() } func TestConstructObjectURL(t *testing.T) { From 59bcd61eb318c35fa2306a9f2774e97b28684f14 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 29 Jul 2019 12:17:02 -0600 Subject: [PATCH 50/59] Ignore aws ErrCodeRequestCanceled --- vendor/vendor.json | 1 + x-pack/filebeat/input/s3/input.go | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/vendor/vendor.json b/vendor/vendor.json index ac34c3e30c5..aa8b29cf12d 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -252,6 +252,7 @@ "path": "github.com/aws/aws-sdk-go-v2/aws/awserr", "revision": "098e15df3044cf1b04a222c1c33c3e6135ac89f3", "revisionTime": "2019-05-28T21:51:27Z", + "tree": true, "version": "v0.9.0", "versionExact": "v0.9.0" }, diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index bd7b9572e2b..91413636682 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -17,6 +17,7 @@ import ( "time" awssdk "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/awserr" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/s3iface" "github.com/aws/aws-sdk-go-v2/service/sqs" @@ -194,6 +195,9 @@ func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int6 output, err := req.Send(p.context) if err != nil { + if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == awssdk.ErrCodeRequestCanceled { + continue + } p.logger.Error("failed to receive message from SQS:", err) time.Sleep(time.Duration(waitTimeSecond) * time.Second) continue @@ -443,7 +447,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string, s3Con Timestamp: time.Now(), Fields: f, Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, - Private: s3Context, + Private: s3Context, } } From e0e83d94f51b187662ca9fc859d2a495c9ce7f8c Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Mon, 29 Jul 2019 13:24:51 -0600 Subject: [PATCH 51/59] Run mage fmt update --- x-pack/filebeat/input/s3/input.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 91413636682..3c7bcbd56d0 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -447,7 +447,7 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string, s3Con Timestamp: time.Now(), Fields: f, Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, - Private: s3Context, + Private: s3Context, } } From 775c6285d31cf8134862bcb0e379d908cb249857 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Tue, 30 Jul 2019 22:32:38 -0600 Subject: [PATCH 52/59] Change ref to start from 1 --- x-pack/filebeat/input/s3/input.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 3c7bcbd56d0..7f4999b3fec 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -44,6 +44,8 @@ var ( // sooner than WaitTimeSeconds. If no messages are available and the wait time // expires, the call returns successfully with an empty list of messages. waitTimeSecond int64 = 10 + + errOutletClosed = errors.New("input outlet closed") ) func init() { @@ -335,6 +337,7 @@ func handleSQSMessage(m sqs.Message) ([]s3Info, error) { func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC chan error) error { s3Context := &s3Context{ + refs: 1, errC: errC, } @@ -402,7 +405,7 @@ func (p *Input) bufferedIORead(svc s3iface.ClientAPI, s3Info s3Info) (*bufio.Rea func (p *Input) forwardEvent(event beat.Event) error { ok := p.outlet.OnEvent(event) if !ok { - return errors.New("input outlet closed") + return errOutletClosed } return nil } @@ -475,7 +478,7 @@ func (c *s3Context) Fail(err error) { func (c *s3Context) done() { c.refs-- - if c.refs == 0 { + if c.refs == 1 { c.errC <- c.err close(c.errC) } From 6558743fdbf10b6604a214b3324f7f8933b85d84 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 31 Jul 2019 07:28:21 -0600 Subject: [PATCH 53/59] Change to newS3BucketReader --- x-pack/filebeat/input/s3/input.go | 16 ++++++++++------ x-pack/filebeat/input/s3/input_test.go | 9 +++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 7f4999b3fec..07a763c4dae 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -182,7 +182,7 @@ func (p *Input) Run() { } func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int64) { - defer p.logger.Infof("S3 input worker for '%v' has stopped.", p.config.QueueURL) + defer p.logger.Infof("s3 input worker for '%v' has stopped.", p.config.QueueURL) p.logger.Infof("s3 input worker has started. with queueURL: %v", p.config.QueueURL) for p.context.Err() == nil { // receive messages from sqs @@ -340,14 +340,15 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch refs: 1, errC: errC, } + defer s3Context.done() for _, s3Info := range s3Infos { objectHash := s3ObjectHash(s3Info) // read from s3 object - reader, err := p.bufferedIORead(svc, s3Info) + reader, err := newS3BucketReader(svc, s3Info, p.context) if err != nil { - return errors.Wrap(err, "bufferedIORead failed") + return errors.Wrap(err, "newS3BucketReader failed") } offset := 0 @@ -387,14 +388,14 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch return nil } -func (p *Input) bufferedIORead(svc s3iface.ClientAPI, s3Info s3Info) (*bufio.Reader, error) { +func newS3BucketReader(svc s3iface.ClientAPI, s3Info s3Info, context *channelContext) (*bufio.Reader, error) { s3GetObjectInput := &s3.GetObjectInput{ Bucket: awssdk.String(s3Info.name), Key: awssdk.String(s3Info.key), } req := svc.GetObjectRequest(s3GetObjectInput) - resp, err := req.Send(p.context) + resp, err := req.Send(context) if err != nil { return nil, errors.Wrapf(err, "s3 get object request failed %v", s3Info.key) } @@ -425,6 +426,9 @@ func (p *Input) deleteMessage(queueURL string, messagesReceiptHandle string, svc } func createEvent(log string, offset int, s3Info s3Info, objectHash string, s3Context *s3Context) beat.Event { + s3Context.Inc() + defer s3Context.done() + f := common.MapStr{ "message": log, "log": common.MapStr{ @@ -478,7 +482,7 @@ func (c *s3Context) Fail(err error) { func (c *s3Context) done() { c.refs-- - if c.refs == 1 { + if c.refs == 0 { c.errC <- c.err close(c.errC) } diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 0e7a9da3cb2..4c50fd317ea 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -35,7 +35,6 @@ var ( key: "log2019-06-21-16-16-54", region: "us-west-1", } - p = Input{} ) func (m *MockS3Client) GetObjectRequest(input *s3.GetObjectInput) s3.GetObjectRequest { @@ -105,8 +104,9 @@ func TestHandleMessage(t *testing.T) { } } -func TestBufferedIORead(t *testing.T) { - reader, err := p.bufferedIORead(mockSvc, info) +func TestNewS3BucketReader(t *testing.T) { + p := Input{} + reader, err := newS3BucketReader(mockSvc, info, p.context) assert.NoError(t, err) for i := 0; i < 3; i++ { switch i { @@ -127,6 +127,7 @@ func TestBufferedIORead(t *testing.T) { } func TestCreateEvent(t *testing.T) { + p := Input{} errC := make(chan error) s3Context := &s3Context{ refs: 1, @@ -142,7 +143,7 @@ func TestCreateEvent(t *testing.T) { } s3ObjectHash := s3ObjectHash(s3Info) - reader, err := p.bufferedIORead(mockSvc, s3Info) + reader, err := newS3BucketReader(mockSvc, s3Info, p.context) assert.NoError(t, err) var events []beat.Event for { From 0ce8dc6687de260245aa167477d9be3807f68216 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 31 Jul 2019 08:49:50 -0600 Subject: [PATCH 54/59] Add waitgroup --- x-pack/filebeat/input/s3/input.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 07a763c4dae..3c5ab0d9dda 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -64,6 +64,7 @@ type Input struct { close chan struct{} workerOnce sync.Once // Guarantees that the worker goroutine is only started once. context *channelContext + workerWg sync.WaitGroup // Waits on s3 worker goroutine. } type s3Info struct { @@ -166,6 +167,7 @@ func NewInput(cfg *common.Config, connector channel.Connector, context input.Con // Run runs the input func (p *Input) Run() { p.workerOnce.Do(func() { + p.workerWg.Add(1) visibilityTimeout := int64(p.config.VisibilityTimeout.Seconds()) regionName, err := getRegionFromQueueURL(p.config.QueueURL) if err != nil { @@ -183,6 +185,7 @@ func (p *Input) Run() { func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int64) { defer p.logger.Infof("s3 input worker for '%v' has stopped.", p.config.QueueURL) + defer p.workerWg.Done() p.logger.Infof("s3 input worker has started. with queueURL: %v", p.config.QueueURL) for p.context.Err() == nil { // receive messages from sqs @@ -219,6 +222,8 @@ func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int6 func (p *Input) Stop() { defer p.outlet.Close() close(p.close) + p.context.Done() + p.workerWg.Wait() p.logger.Info("Stopping s3 input") } @@ -340,7 +345,6 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch refs: 1, errC: errC, } - defer s3Context.done() for _, s3Info := range s3Infos { objectHash := s3ObjectHash(s3Info) @@ -385,6 +389,8 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch } } } + + s3Context.done() return nil } @@ -397,6 +403,9 @@ func newS3BucketReader(svc s3iface.ClientAPI, s3Info s3Info, context *channelCon resp, err := req.Send(context) if err != nil { + if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == awssdk.ErrCodeRequestCanceled { + return nil, nil + } return nil, errors.Wrapf(err, "s3 get object request failed %v", s3Info.key) } @@ -420,6 +429,9 @@ func (p *Input) deleteMessage(queueURL string, messagesReceiptHandle string, svc req := svcSQS.DeleteMessageRequest(deleteMessageInput) _, err := req.Send(p.context) if err != nil { + if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == awssdk.ErrCodeRequestCanceled { + return nil + } return errors.Wrap(err, "DeleteMessageRequest failed") } return nil From 610d854893635d8ee3df5630f69089f65b522555 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 31 Jul 2019 11:18:03 -0600 Subject: [PATCH 55/59] Add documentation for setting up sqs s3 and manual testing --- .../docs/inputs/input-aws-s3.asciidoc | 23 +++++++++++++++++ x-pack/filebeat/input/s3/input.go | 25 +++++++++---------- x-pack/filebeat/input/s3/input_test.go | 6 ++--- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index b1f9d8d8e40..1d5fc95ca8f 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -77,7 +77,30 @@ optional configuration. Please use `Example configuration 2` as an example. If `credential_profile_name` is not specified, then `s3` input will consume credentials from shared AWS credentials file with `default` profile name. +Please see <> for more details about how to configure aws +credentials. + +=== S3 and SQS setup +Enable bucket notification: any new object creation in S3 bucket will also +create a notification through SQS. Please see +https://docs.aws.amazon.com/AmazonS3/latest/dev/ways-to-add-notification-config-to-bucket.html#step1-create-sqs-queue-for-notification[create-sqs-queue-for-notification] +for more details. + +=== manual testing +1. Upload fake log files into the S3 bucket that has SQS notification enabled. +2. Check from SQS if there are N messages received. +3. Start filebeat with `./filebeat -e` and check Kibana if there are events reported +with messages from the example logs. Depends on the number of log lines in each +fake log file, check if the number of events match the number of log lines total +from all log files. +4. Check SQS if messages are deleted successfully. +5. Interrupt the s3 input process by killing filebeat during processing new S3 logs, +check if messages in SQS are in flight instead of deleted. + [id="{beatname_lc}-input-{type}-common-options"] include::../../../../filebeat/docs/inputs/input-common-options.asciidoc[] +[id="aws-credentials-config"] +include::{libbeat-xpack-dir}/docs/aws-credentials-config.asciidoc[] + :type!: diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index 3c5ab0d9dda..4197af27ebc 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -10,7 +10,6 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" - "fmt" "io" "strings" "sync" @@ -65,6 +64,7 @@ type Input struct { workerOnce sync.Once // Guarantees that the worker goroutine is only started once. context *channelContext workerWg sync.WaitGroup // Waits on s3 worker goroutine. + stopOnce sync.Once } type s3Info struct { @@ -167,7 +167,6 @@ func NewInput(cfg *common.Config, connector channel.Connector, context input.Con // Run runs the input func (p *Input) Run() { p.workerOnce.Do(func() { - p.workerWg.Add(1) visibilityTimeout := int64(p.config.VisibilityTimeout.Seconds()) regionName, err := getRegionFromQueueURL(p.config.QueueURL) if err != nil { @@ -179,6 +178,7 @@ func (p *Input) Run() { svcSQS := sqs.New(awsConfig) svcS3 := s3.New(awsConfig) + p.workerWg.Add(1) go p.run(svcSQS, svcS3, visibilityTimeout) }) } @@ -220,16 +220,18 @@ func (p *Input) run(svcSQS *sqs.Client, svcS3 *s3.Client, visibilityTimeout int6 // Stop stops the s3 input func (p *Input) Stop() { - defer p.outlet.Close() - close(p.close) - p.context.Done() - p.workerWg.Wait() - p.logger.Info("Stopping s3 input") + p.stopOnce.Do(func() { + defer p.outlet.Close() + close(p.close) + p.context.Done() + p.logger.Info("Stopping s3 input") + }) } // Wait stops the s3 input. func (p *Input) Wait() { p.Stop() + p.workerWg.Wait() } func (p *Input) processor(queueURL string, messages []sqs.Message, visibilityTimeout int64, svcS3 *s3.Client, svcSQS *sqs.Client) { @@ -345,6 +347,7 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch refs: 1, errC: errC, } + defer s3Context.done() for _, s3Info := range s3Infos { objectHash := s3ObjectHash(s3Info) @@ -390,7 +393,6 @@ func (p *Input) handleS3Objects(svc s3iface.ClientAPI, s3Infos []s3Info, errC ch } } - s3Context.done() return nil } @@ -438,9 +440,6 @@ func (p *Input) deleteMessage(queueURL string, messagesReceiptHandle string, svc } func createEvent(log string, offset int, s3Info s3Info, objectHash string, s3Context *s3Context) beat.Event { - s3Context.Inc() - defer s3Context.done() - f := common.MapStr{ "message": log, "log": common.MapStr{ @@ -465,8 +464,8 @@ func createEvent(log string, offset int, s3Info s3Info, objectHash string, s3Con return beat.Event{ Timestamp: time.Now(), Fields: f, - Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, - Private: s3Context, + // Meta: common.MapStr{"id": objectHash + "-" + fmt.Sprintf("%012d", offset)}, + Private: s3Context, } } diff --git a/x-pack/filebeat/input/s3/input_test.go b/x-pack/filebeat/input/s3/input_test.go index 4c50fd317ea..c51e291b45a 100644 --- a/x-pack/filebeat/input/s3/input_test.go +++ b/x-pack/filebeat/input/s3/input_test.go @@ -105,8 +105,7 @@ func TestHandleMessage(t *testing.T) { } func TestNewS3BucketReader(t *testing.T) { - p := Input{} - reader, err := newS3BucketReader(mockSvc, info, p.context) + reader, err := newS3BucketReader(mockSvc, info, &channelContext{}) assert.NoError(t, err) for i := 0; i < 3; i++ { switch i { @@ -127,7 +126,6 @@ func TestNewS3BucketReader(t *testing.T) { } func TestCreateEvent(t *testing.T) { - p := Input{} errC := make(chan error) s3Context := &s3Context{ refs: 1, @@ -143,7 +141,7 @@ func TestCreateEvent(t *testing.T) { } s3ObjectHash := s3ObjectHash(s3Info) - reader, err := newS3BucketReader(mockSvc, s3Info, p.context) + reader, err := newS3BucketReader(mockSvc, s3Info, &channelContext{}) assert.NoError(t, err) var events []beat.Event for { From 837f16a2bafc483d4dbdd8078aa259c410146df5 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Thu, 1 Aug 2019 15:56:02 -0600 Subject: [PATCH 56/59] move manual testing into a separate doc --- .../docs/inputs/input-aws-s3.asciidoc | 11 ---------- .../filebeat/input/s3/_meta/s3-input.asciidoc | 21 +++++++++++++++++++ 2 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 x-pack/filebeat/input/s3/_meta/s3-input.asciidoc diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 1d5fc95ca8f..53ae637e701 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -86,17 +86,6 @@ create a notification through SQS. Please see https://docs.aws.amazon.com/AmazonS3/latest/dev/ways-to-add-notification-config-to-bucket.html#step1-create-sqs-queue-for-notification[create-sqs-queue-for-notification] for more details. -=== manual testing -1. Upload fake log files into the S3 bucket that has SQS notification enabled. -2. Check from SQS if there are N messages received. -3. Start filebeat with `./filebeat -e` and check Kibana if there are events reported -with messages from the example logs. Depends on the number of log lines in each -fake log file, check if the number of events match the number of log lines total -from all log files. -4. Check SQS if messages are deleted successfully. -5. Interrupt the s3 input process by killing filebeat during processing new S3 logs, -check if messages in SQS are in flight instead of deleted. - [id="{beatname_lc}-input-{type}-common-options"] include::../../../../filebeat/docs/inputs/input-common-options.asciidoc[] diff --git a/x-pack/filebeat/input/s3/_meta/s3-input.asciidoc b/x-pack/filebeat/input/s3/_meta/s3-input.asciidoc new file mode 100644 index 00000000000..9a8e762e1b0 --- /dev/null +++ b/x-pack/filebeat/input/s3/_meta/s3-input.asciidoc @@ -0,0 +1,21 @@ +=== S3 and SQS Setup +Enable bucket notification: any new object creation in S3 bucket will also +create a notification through SQS. Please see +https://docs.aws.amazon.com/AmazonS3/latest/dev/ways-to-add-notification-config-to-bucket.html#step1-create-sqs-queue-for-notification[create-sqs-queue-for-notification] +for more details. +1. In SQS, edit policy document to create a new policy. +2. In S3 bucket, enable and configure event notification. +3. In order to make sure the S3-SQS setup is ready, upload a file into the S3 +bucket and check if SQS gets a message showing that a new object is created with +its name. + +=== Manual Testing +1. Upload fake log files into the S3 bucket that has SQS notification enabled. +2. Check from SQS if there are N messages received. +3. Start filebeat with `./filebeat -e` and check Kibana if there are events reported +with messages from the example logs. Depends on the number of log lines in each +fake log file, check if the number of events match the number of log lines total +from all log files. +4. Check SQS if messages are deleted successfully. +5. Interrupt the s3 input process by killing filebeat during processing new S3 logs, +check if messages in SQS are in flight instead of deleted. From 13dd71a8f5656d198d40477abbe246552889e381 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 2 Aug 2019 06:33:28 -0600 Subject: [PATCH 57/59] Add s3 input doc into filebeat-options --- filebeat/docs/filebeat-options.asciidoc | 3 +++ x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/filebeat/docs/filebeat-options.asciidoc b/filebeat/docs/filebeat-options.asciidoc index 05c906de990..2f6a021eb69 100644 --- a/filebeat/docs/filebeat-options.asciidoc +++ b/filebeat/docs/filebeat-options.asciidoc @@ -50,6 +50,7 @@ You can configure {beatname_uc} to use the following inputs: * <<{beatname_lc}-input-docker>> * <<{beatname_lc}-input-tcp>> * <<{beatname_lc}-input-syslog>> +* <<{beatname_lc}-input-s3>> * <<{beatname_lc}-input-netflow>> * <<{beatname_lc}-input-google-pubsub>> @@ -70,6 +71,8 @@ include::inputs/input-tcp.asciidoc[] include::inputs/input-syslog.asciidoc[] +include::../../x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc[] + include::../../x-pack/filebeat/docs/inputs/input-netflow.asciidoc[] include::../../x-pack/filebeat/docs/inputs/input-google-pubsub.asciidoc[] diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 53ae637e701..beb05d5874b 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -1,5 +1,7 @@ [role="xpack"] +:libbeat-xpack-dir: ../../../../x-pack/libbeat + :type: s3 [id="{beatname_lc}-input-{type}"] @@ -30,9 +32,9 @@ Example configuration 1: {beatname_lc}.inputs: - type: s3 queue_url: https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue - access_key_id: '${AWS_ACCESS_KEY_ID:""}' - secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' - session_token: '${AWS_SESSION_TOKEN:"”}' + access_key_id: '$\{AWS_ACCESS_KEY_ID:"fake-access-key"\}' + secret_access_key: '$\{AWS_SECRET_ACCESS_KEY:"fake-secret-key"\}' + session_token: '$\{AWS_SESSION_TOKEN:"fake-session-token"\}' ---- Example configuration 2: @@ -80,6 +82,7 @@ credentials from shared AWS credentials file with `default` profile name. Please see <> for more details about how to configure aws credentials. +[float] === S3 and SQS setup Enable bucket notification: any new object creation in S3 bucket will also create a notification through SQS. Please see From 87be1a944541ca888dfd051ee5a3aa6cd13b0584 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 2 Aug 2019 07:29:13 -0600 Subject: [PATCH 58/59] rebase and add filebeat-s3-input-config --- .../docs/filebeat-s3-input-config.asciidoc | 31 +++++++++++++++++ .../docs/inputs/input-aws-s3.asciidoc | 34 +++---------------- .../metricbeat/module/aws/_meta/docs.asciidoc | 2 +- 3 files changed, 37 insertions(+), 30 deletions(-) create mode 100644 x-pack/filebeat/docs/filebeat-s3-input-config.asciidoc diff --git a/x-pack/filebeat/docs/filebeat-s3-input-config.asciidoc b/x-pack/filebeat/docs/filebeat-s3-input-config.asciidoc new file mode 100644 index 00000000000..6c28e92cc83 --- /dev/null +++ b/x-pack/filebeat/docs/filebeat-s3-input-config.asciidoc @@ -0,0 +1,31 @@ +* Use AWS credentials in filebeat configuration +[source,yaml] +---- +filebeat.inputs: +- type: s3 + queue_url: https://sqs.us-east-1.amazonaws.com/123/test-queue + access_key_id: '' + secret_access_key: '' + session_token: '' +---- + + or + +[source,yaml] +---- +filebeat.inputs: +- type: s3 + queue_url: https://sqs.us-east-1.amazonaws.com/123/test-queue + access_key_id: '${AWS_ACCESS_KEY_ID:""}' + secret_access_key: '${AWS_SECRET_ACCESS_KEY:""}' + session_token: '${AWS_SESSION_TOKEN:""}' +---- + +* Use shared AWS credentials file +[source,yaml] +---- +filebeat.inputs: +- type: s3 + queue_url: https://sqs.us-east-1.amazonaws.com/123/test-queue + credential_profile_name: test-fb +---- diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index beb05d5874b..a2455f12975 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -25,26 +25,13 @@ does not go back to the queue in the middle of the processing. If there are errors happening during the processing of the s3 object, then the process will be stopped and the sqs message will be returned back to the queue. -Example configuration 1: - ["source","yaml",subs="attributes"] ---- {beatname_lc}.inputs: - type: s3 queue_url: https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue - access_key_id: '$\{AWS_ACCESS_KEY_ID:"fake-access-key"\}' - secret_access_key: '$\{AWS_SECRET_ACCESS_KEY:"fake-secret-key"\}' - session_token: '$\{AWS_SESSION_TOKEN:"fake-session-token"\}' ----- - -Example configuration 2: - -["source","yaml",subs="attributes"] ----- -{beatname_lc}.inputs: -- type: s3 - queue_url: https://sqs.ap-southeast-1.amazonaws.com/1234/test-s3-queue - credential_profile_name: test-s3-input + access_key_id: my-access-key + secret_access_key: my-secret-access-key ---- The `s3` input supports the following configuration options plus the @@ -68,19 +55,8 @@ is 0 seconds. The maximum is 12 hours. [float] ==== `aws credentials` -In order to make AWS API calls, `s3` input requires AWS credentials. Users can -either put the values into the configuration for `access_key_id`, -`secret_access_key` and/or `session_token`, or use environment variable -`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and/or `AWS_SESSION_TOKEN` instead. -Please use `Example configuration 1` as an example. - -Or shared AWS credentials file can be used with `credential_profile_name` as an -optional configuration. Please use `Example configuration 2` as an example. If -`credential_profile_name` is not specified, then `s3` input will consume -credentials from shared AWS credentials file with `default` profile name. - -Please see <> for more details about how to configure aws -credentials. +In order to make AWS API calls, `s3` input requires AWS credentials.Please see + <> for more details. [float] === S3 and SQS setup @@ -93,6 +69,6 @@ for more details. include::../../../../filebeat/docs/inputs/input-common-options.asciidoc[] [id="aws-credentials-config"] -include::{libbeat-xpack-dir}/docs/aws-credentials-config.asciidoc[] +include::../filebeat-s3-input-config.asciidoc :type!: diff --git a/x-pack/metricbeat/module/aws/_meta/docs.asciidoc b/x-pack/metricbeat/module/aws/_meta/docs.asciidoc index 44776dba250..fe13b359be0 100644 --- a/x-pack/metricbeat/module/aws/_meta/docs.asciidoc +++ b/x-pack/metricbeat/module/aws/_meta/docs.asciidoc @@ -81,4 +81,4 @@ for a list of AWS services that publish metrics to CloudWatch. dimensions to Amazon CloudWatch every minute. [id="aws-credentials-config"] -include::{libbeat-xpack-dir}/docs/aws-credentials-config.asciidoc[] +include::../../../../../metricbeat/docs/aws-credentials-examples.asciidoc From e132c19a372234da6f22bc7b9730ce33798e7ed1 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Fri, 2 Aug 2019 08:30:58 -0600 Subject: [PATCH 59/59] Fix link in metricbeat doc --- metricbeat/docs/modules/aws.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metricbeat/docs/modules/aws.asciidoc b/metricbeat/docs/modules/aws.asciidoc index 74670409aaa..cd3f8dc7748 100644 --- a/metricbeat/docs/modules/aws.asciidoc +++ b/metricbeat/docs/modules/aws.asciidoc @@ -88,7 +88,7 @@ for a list of AWS services that publish metrics to CloudWatch. dimensions to Amazon CloudWatch every minute. [id="aws-credentials-config"] -include::{libbeat-xpack-dir}/docs/aws-credentials-config.asciidoc[] +include::../../../../../metricbeat/docs/aws-credentials-examples.asciidoc [float]