From 5730900a7f5dd70ed82916398f4711d81297a203 Mon Sep 17 00:00:00 2001 From: Raul Martinez Date: Tue, 24 Mar 2026 12:39:22 +0100 Subject: [PATCH 1/2] [extension/awslogsencodingextension] fix s3access parser: add source_region field and forward-compat for unknown fields AWS added source_region as the 27th field in S3 server access logs. The parser previously failed with "values in log line exceed the number of available fields" when it encountered 27+ fields. - Add fieldIndexSourceRegion (26) mapped to aws.s3.source_region - Skip fields beyond the known schema instead of erroring, so future AWS additions do not break the parser - Update test data and expected output accordingly --- ...fix-s3-access-log-source-region-field.yaml | 17 ++++ .../unmarshaler/s3-access-log/fields.go | 3 + .../testdata/too_many_values.log | 2 +- .../testdata/too_many_values_expected.yaml | 89 +++++++++++++++++++ .../testdata/valid_s3_access_log.log | 2 +- .../valid_s3_access_log_expected.yaml | 3 + .../unmarshaler/s3-access-log/unmarshaler.go | 14 +-- .../s3-access-log/unmarshaler_test.go | 6 +- 8 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 .chloggen/fix-s3-access-log-source-region-field.yaml create mode 100644 extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/too_many_values_expected.yaml diff --git a/.chloggen/fix-s3-access-log-source-region-field.yaml b/.chloggen/fix-s3-access-log-source-region-field.yaml new file mode 100644 index 0000000000000..c2c085af74011 --- /dev/null +++ b/.chloggen/fix-s3-access-log-source-region-field.yaml @@ -0,0 +1,17 @@ +change_type: bug_fix + +component: extension/awslogsencodingextension + +note: Add `source_region` field (27th field) to S3 server access log parser and skip unknown future fields gracefully. + +issues: [47149] + +subtext: | + AWS added a `source_region` field to the S3 server access log format. The parser + previously returned an error ("values in log line exceed the number of available fields") + when it encountered log lines with more fields than defined. This fix: + - Adds `source_region` as field index 26 mapped to `aws.s3.source_region`. + - Makes the parser skip any fields beyond the known schema instead of failing, + providing forward compatibility with future AWS S3 access log additions. + +change_logs: [user] diff --git a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/fields.go b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/fields.go index daaff48d15bd2..a1f9bba0f58c0 100644 --- a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/fields.go +++ b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/fields.go @@ -14,6 +14,7 @@ const ( attributeAWSS3ObjectSize = "aws.s3.object.size" attributeAWSS3TurnAroundTime = "aws.s3.turn_around_time" attributeAWSS3AclRequired = "aws.s3.acl_required" + attributeAWSS3SourceRegion = "aws.s3.source_region" fieldIndexS3BucketOwner = 0 fieldIndexS3BucketName = 1 @@ -41,6 +42,7 @@ const ( fieldIndexTLSVersion = 23 fieldIndexAccessPointARN = 24 fieldIndexACLRequired = 25 + fieldIndexSourceRegion = 26 ) // Some of the attribute names are based on semantic conventions for AWS S3. @@ -75,4 +77,5 @@ var attributeNames = [...]string{ fieldIndexTLSVersion: string(conventions.TLSProtocolVersionKey), // TLS version fieldIndexAccessPointARN: "aws.s3.access_point.arn", // access point ARN fieldIndexACLRequired: attributeAWSS3AclRequired, // acl required + fieldIndexSourceRegion: attributeAWSS3SourceRegion, // source region } diff --git a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/too_many_values.log b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/too_many_values.log index b56beb3bbd254..11a035038a2de 100644 --- a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/too_many_values.log +++ b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/too_many_values.log @@ -1 +1 @@ -79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be amzn-s3-demo-bucket1 [06/Feb/2019:00:00:38 +0000] 192.0.2.3 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be 3E57427F3EXAMPLE REST.GET.VERSIONING - "GET /amzn-s3-demo-bucket1?versioning HTTP/1.1" 200 - 113 - 7 - "-" "S3Console/0.4" - s9lzHYrFp76ZVxRcpX9+5cjAnEH2ROuNkd2BHfIa6UkFVdtjf5mKR3/eTPFvsiP/XV/VLi31234= SigV4 ECDHE-RSA-AES128-GCM-SHA256 AuthHeader amzn-s3-demo-bucket1.s3.us-west-1.amazonaws.com TLSV1.2 arn:aws:s3:us-west-1:123456789012:accesspoint/example-AP Yes TooMany \ No newline at end of file +79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be amzn-s3-demo-bucket1 [06/Feb/2019:00:00:38 +0000] 192.0.2.3 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be 3E57427F3EXAMPLE REST.GET.VERSIONING - "GET /amzn-s3-demo-bucket1?versioning HTTP/1.1" 200 - 113 - 7 - "-" "S3Console/0.4" - s9lzHYrFp76ZVxRcpX9+5cjAnEH2ROuNkd2BHfIa6UkFVdtjf5mKR3/eTPFvsiP/XV/VLi31234= SigV4 ECDHE-RSA-AES128-GCM-SHA256 AuthHeader amzn-s3-demo-bucket1.s3.us-west-1.amazonaws.com TLSV1.2 arn:aws:s3:us-west-1:123456789012:accesspoint/example-AP Yes us-east-1 ExtraUnknownField diff --git a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/too_many_values_expected.yaml b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/too_many_values_expected.yaml new file mode 100644 index 0000000000000..c84006b6b7a41 --- /dev/null +++ b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/too_many_values_expected.yaml @@ -0,0 +1,89 @@ +resourceLogs: + - resource: + attributes: + - key: cloud.provider + value: + stringValue: aws + - key: aws.s3.bucket + value: + stringValue: amzn-s3-demo-bucket1 + - key: aws.s3.owner + value: + stringValue: 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be + scopeLogs: + - logRecords: + - attributes: + - key: source.address + value: + stringValue: 192.0.2.3 + - key: user.id + value: + stringValue: 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be + - key: aws.request_id + value: + stringValue: 3E57427F3EXAMPLE + - key: rpc.method + value: + stringValue: REST.GET.VERSIONING + - key: http.request.method + value: + stringValue: GET + - key: url.path + value: + stringValue: /amzn-s3-demo-bucket1 + - key: url.query + value: + stringValue: versioning + - key: network.protocol.name + value: + stringValue: http + - key: network.protocol.version + value: + stringValue: "1.1" + - key: http.response.status_code + value: + intValue: "200" + - key: http.response.body.size + value: + intValue: "113" + - key: duration + value: + intValue: "7" + - key: user_agent.original + value: + stringValue: S3Console/0.4 + - key: aws.extended_request_id + value: + stringValue: s9lzHYrFp76ZVxRcpX9+5cjAnEH2ROuNkd2BHfIa6UkFVdtjf5mKR3/eTPFvsiP/XV/VLi31234= + - key: aws.signature.version + value: + stringValue: SigV4 + - key: tls.cipher + value: + stringValue: ECDHE-RSA-AES128-GCM-SHA256 + - key: aws.s3.auth_type + value: + stringValue: AuthHeader + - key: http.request.header.host + value: + stringValue: amzn-s3-demo-bucket1.s3.us-west-1.amazonaws.com + - key: tls.protocol.version + value: + stringValue: "1.2" + - key: aws.s3.access_point.arn + value: + stringValue: arn:aws:s3:us-west-1:123456789012:accesspoint/example-AP + - key: aws.s3.acl_required + value: + boolValue: true + - key: aws.s3.source_region + value: + stringValue: us-east-1 + body: {} + timeUnixNano: "1549411238000000000" + scope: + attributes: + - key: encoding.format + value: + stringValue: aws.s3access + name: github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding/awslogsencodingextension diff --git a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/valid_s3_access_log.log b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/valid_s3_access_log.log index bfb059de0c546..6055d937766d6 100644 --- a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/valid_s3_access_log.log +++ b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/valid_s3_access_log.log @@ -1 +1 @@ -79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be amzn-s3-demo-bucket1 [06/Feb/2019:00:00:38 +0000] 192.0.2.3 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be 3E57427F3EXAMPLE REST.GET.VERSIONING - "GET /amzn-s3-demo-bucket1?versioning HTTP/1.1" 200 - 113 - 7 - "-" "S3Console/0.4" - s9lzHYrFp76ZVxRcpX9+5cjAnEH2ROuNkd2BHfIa6UkFVdtjf5mKR3/eTPFvsiP/XV/VLi31234= SigV4 ECDHE-RSA-AES128-GCM-SHA256 AuthHeader amzn-s3-demo-bucket1.s3.us-west-1.amazonaws.com TLSV1.2 arn:aws:s3:us-west-1:123456789012:accesspoint/example-AP Yes \ No newline at end of file +79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be amzn-s3-demo-bucket1 [06/Feb/2019:00:00:38 +0000] 192.0.2.3 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be 3E57427F3EXAMPLE REST.GET.VERSIONING - "GET /amzn-s3-demo-bucket1?versioning HTTP/1.1" 200 - 113 - 7 - "-" "S3Console/0.4" - s9lzHYrFp76ZVxRcpX9+5cjAnEH2ROuNkd2BHfIa6UkFVdtjf5mKR3/eTPFvsiP/XV/VLi31234= SigV4 ECDHE-RSA-AES128-GCM-SHA256 AuthHeader amzn-s3-demo-bucket1.s3.us-west-1.amazonaws.com TLSV1.2 arn:aws:s3:us-west-1:123456789012:accesspoint/example-AP Yes us-east-1 diff --git a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/valid_s3_access_log_expected.yaml b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/valid_s3_access_log_expected.yaml index e0829a6be5100..c84006b6b7a41 100644 --- a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/valid_s3_access_log_expected.yaml +++ b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/testdata/valid_s3_access_log_expected.yaml @@ -76,6 +76,9 @@ resourceLogs: - key: aws.s3.acl_required value: boolValue: true + - key: aws.s3.source_region + value: + stringValue: us-east-1 body: {} timeUnixNano: "1549411238000000000" scope: diff --git a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/unmarshaler.go b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/unmarshaler.go index e73bf192396ad..29500a6f64519 100644 --- a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/unmarshaler.go +++ b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/unmarshaler.go @@ -176,10 +176,6 @@ func handleLog(resourceAttr *resourceAttributes, scopeLogs plog.ScopeLogs, log s var value string var err error for i = 0; remaining != ""; i++ { - if i >= len(attributeNames) { - return errors.New("values in log line exceed the number of available fields") - } - value, remaining, err = scanField(remaining) if err != nil { if errors.Is(err, io.EOF) { @@ -188,6 +184,14 @@ func handleLog(resourceAttr *resourceAttributes, scopeLogs plog.ScopeLogs, log s return err } + if i >= len(attributeNames) { + // Skip unknown fields for forward compatibility with future AWS S3 + // access log format additions. AWS may append new fields without a + // format version bump. + // See https://docs.aws.amazon.com/AmazonS3/latest/userguide/LogFormat.html. + continue + } + if value == unknownField && i != fieldIndexACLRequired { // acl required field can be '-' to indicate that no ACL was required continue @@ -207,7 +211,7 @@ func handleLog(resourceAttr *resourceAttributes, scopeLogs plog.ScopeLogs, log s } } - if i != fieldIndexACLRequired+1 { + if i < fieldIndexACLRequired+1 { return errors.New("values in log line are less than the number of available fields") } diff --git a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/unmarshaler_test.go b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/unmarshaler_test.go index f5e61db4b739f..74e497a0ac0fd 100644 --- a/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/unmarshaler_test.go +++ b/extension/encoding/awslogsencodingextension/internal/unmarshaler/s3-access-log/unmarshaler_test.go @@ -200,8 +200,10 @@ func TestUnmarshalLogs(t *testing.T) { expectedErr: "values in log line are less than the number of available fields", }, "too_many_values": { - logFilename: "too_many_values.log", - expectedErr: "values in log line exceed the number of available fields", + // Extra fields beyond the known schema are silently skipped for + // forward compatibility with future AWS S3 access log additions. + logFilename: "too_many_values.log", + expectedFilename: "too_many_values_expected.yaml", }, } From 90b9c6924c88cb3fa3860f061b0b65627080f2fb Mon Sep 17 00:00:00 2001 From: Andrew Wilkins Date: Thu, 2 Apr 2026 10:58:29 +0800 Subject: [PATCH 2/2] Update .chloggen/fix-s3-access-log-source-region-field.yaml --- .chloggen/fix-s3-access-log-source-region-field.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.chloggen/fix-s3-access-log-source-region-field.yaml b/.chloggen/fix-s3-access-log-source-region-field.yaml index c2c085af74011..c337f47c87df8 100644 --- a/.chloggen/fix-s3-access-log-source-region-field.yaml +++ b/.chloggen/fix-s3-access-log-source-region-field.yaml @@ -1,6 +1,6 @@ change_type: bug_fix -component: extension/awslogsencodingextension +component: extension/aws_logs_encoding note: Add `source_region` field (27th field) to S3 server access log parser and skip unknown future fields gracefully.