Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

r/aws_s3_bucket: Speed up deletion #24020

Merged
merged 23 commits into from
Apr 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
4485e12
The start of some ideas around improving bucket emptying.
ewbankkit Mar 31, 2022
d8cc0e6
Ignore any object versions that do not have the required Key.
ewbankkit Mar 31, 2022
658f663
Add 'deleteDeleteMarkerListIterator'.
ewbankkit Mar 31, 2022
d69ce4b
Add 'bypassGovernanceRetention' flag.
ewbankkit Mar 31, 2022
4cf860d
Start implementing 'emptyBucket'.
ewbankkit Mar 31, 2022
7d97ae5
Drop use of 's3manager.BatchDelete'.
ewbankkit Apr 2, 2022
ff9fd29
Tweak error messages.
ewbankkit Apr 4, 2022
0c0f977
Simple Python script to populate a versioned S3 bucket with objects.
ewbankkit Apr 4, 2022
9252e2d
Make 'EmptyBucket' public so that it can be tested.
ewbankkit Apr 4, 2022
cdab1b8
Test harness for the EmptyBucket function.
ewbankkit Apr 4, 2022
1c7a86a
Add ability to lock an S3 object.
ewbankkit Apr 4, 2022
e3e05dc
Attempt to remove legal hold from locked S3 objects.
ewbankkit Apr 4, 2022
1f5f443
Attempt to delete the object once the legal hold has been removed.
ewbankkit Apr 4, 2022
04ff2d6
r/aws_s3_bucket: Use 'EmptyBucket' to speed up 'force_destroy'.
ewbankkit Apr 4, 2022
3668084
Add CHANGELOG entry.
ewbankkit Apr 4, 2022
10b819c
Return the number of deleted S3 objects from 'EmptyBucket'.
ewbankkit Apr 4, 2022
66a0d54
Return the number of deleted S3 objects from 'DeleteAllObjectVersions'.
ewbankkit Apr 4, 2022
23a6d36
Fix S3 sweeper.
ewbankkit Apr 5, 2022
c92fe35
Use 'go test' to test 'EmptyBucket'.
ewbankkit Apr 5, 2022
731c697
Add 'TestDeleteAllObjectVersions'.
ewbankkit Apr 5, 2022
74fc23d
Skip 's3.ErrCodeNoSuchKey' errors when emptying an S3 bucket.
ewbankkit Apr 5, 2022
3b2c9b4
Minor tweak.
ewbankkit Apr 5, 2022
2ebac57
Update 24020.txt
ewbankkit Apr 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/24020.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
resource/aws_s3_bucket: Speed up resource deletion, especially when the S3 buckets contains a large number of objects and `force_destroy` is `true`
```
31 changes: 17 additions & 14 deletions internal/service/s3/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/hashicorp/aws-sdk-go-base/v2/awsv1shim/v2/tfawserr"
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/structure"
Expand All @@ -32,10 +33,11 @@ import (

func ResourceBucket() *schema.Resource {
return &schema.Resource{
Create: resourceBucketCreate,
Read: resourceBucketRead,
Update: resourceBucketUpdate,
Delete: resourceBucketDelete,
Create: resourceBucketCreate,
Read: resourceBucketRead,
Update: resourceBucketUpdate,
DeleteWithoutTimeout: resourceBucketDelete,

Importer: &schema.ResourceImporter{
State: schema.ImportStatePassthrough,
},
Expand Down Expand Up @@ -1404,27 +1406,27 @@ func resourceBucketRead(d *schema.ResourceData, meta interface{}) error {
return nil
}

func resourceBucketDelete(d *schema.ResourceData, meta interface{}) error {
func resourceBucketDelete(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
conn := meta.(*conns.AWSClient).S3Conn

log.Printf("[DEBUG] S3 Delete Bucket: %s", d.Id())
_, err := conn.DeleteBucket(&s3.DeleteBucketInput{
log.Printf("[DEBUG] Deleting S3 Bucket: %s", d.Id())
_, err := conn.DeleteBucketWithContext(ctx, &s3.DeleteBucketInput{
Bucket: aws.String(d.Id()),
})

if tfawserr.ErrCodeEquals(err, s3.ErrCodeNoSuchBucket) {
return nil
}

if tfawserr.ErrCodeEquals(err, "BucketNotEmpty") {
if tfawserr.ErrCodeEquals(err, ErrCodeBucketNotEmpty) {
if d.Get("force_destroy").(bool) {
// Use a S3 service client that can handle multiple slashes in URIs.
// While aws_s3_object resources cannot create these object
// keys, other AWS services and applications using the S3 Bucket can.
conn = meta.(*conns.AWSClient).S3ConnURICleaningDisabled

// bucket may have things delete them
log.Printf("[DEBUG] S3 Bucket attempting to forceDestroy %+v", err)
log.Printf("[DEBUG] S3 Bucket attempting to forceDestroy %s", err)

// Delete everything including locked objects.
// Don't ignore any object errors or we could recurse infinitely.
Expand All @@ -1433,19 +1435,20 @@ func resourceBucketDelete(d *schema.ResourceData, meta interface{}) error {
if objectLockConfiguration != nil {
objectLockEnabled = aws.StringValue(objectLockConfiguration.ObjectLockEnabled) == s3.ObjectLockEnabledEnabled
}
err = DeleteAllObjectVersions(conn, d.Id(), "", objectLockEnabled, false)

if err != nil {
return fmt.Errorf("error S3 Bucket force_destroy: %s", err)
if n, err := EmptyBucket(ctx, conn, d.Id(), objectLockEnabled); err != nil {
return diag.Errorf("emptying S3 Bucket (%s): %s", d.Id(), err)
} else {
log.Printf("[DEBUG] Deleted %d S3 objects", n)
}

// this line recurses until all objects are deleted or an error is returned
return resourceBucketDelete(d, meta)
return resourceBucketDelete(ctx, d, meta)
}
}

if err != nil {
return fmt.Errorf("error deleting S3 Bucket (%s): %s", d.Id(), err)
return diag.Errorf("deleting S3 Bucket (%s): %s", d.Id(), err)
}

return nil
Expand Down
2 changes: 1 addition & 1 deletion internal/service/s3/bucket_object.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ func resourceBucketObjectDelete(d *schema.ResourceData, meta interface{}) error

var err error
if _, ok := d.GetOk("version_id"); ok {
err = DeleteAllObjectVersions(conn, bucket, key, d.Get("force_destroy").(bool), false)
_, err = DeleteAllObjectVersions(conn, bucket, key, d.Get("force_destroy").(bool), false)
} else {
err = deleteS3ObjectVersion(conn, bucket, key, "", false)
}
Expand Down
240 changes: 240 additions & 0 deletions internal/service/s3/delete.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
package s3

import (
"context"
"fmt"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/hashicorp/aws-sdk-go-base/v2/awsv1shim/v2/tfawserr"
multierror "github.com/hashicorp/go-multierror"
)

// EmptyBucket empties the specified S3 bucket by deleting all object versions and delete markers.
// If `force` is `true` then S3 Object Lock governance mode restrictions are bypassed and
// an attempt is made to remove any S3 Object Lock legal holds.
// Returns the number of objects deleted.
func EmptyBucket(ctx context.Context, conn *s3.S3, bucket string, force bool) (int64, error) {
nObjects, err := forEachObjectVersionsPage(ctx, conn, bucket, func(ctx context.Context, conn *s3.S3, bucket string, page *s3.ListObjectVersionsOutput) (int64, error) {
return deletePageOfObjectVersions(ctx, conn, bucket, force, page)
})

if err != nil {
return nObjects, err
}

n, err := forEachObjectVersionsPage(ctx, conn, bucket, deletePageOfDeleteMarkers)
nObjects += n

if err != nil {
return nObjects, err
}

return nObjects, nil
}

// forEachObjectVersionsPage calls the specified function for each page returned from the S3 ListObjectVersionsPages API.
func forEachObjectVersionsPage(ctx context.Context, conn *s3.S3, bucket string, fn func(ctx context.Context, conn *s3.S3, bucket string, page *s3.ListObjectVersionsOutput) (int64, error)) (int64, error) {
var nObjects int64

input := &s3.ListObjectVersionsInput{
Bucket: aws.String(bucket),
}
var lastErr error

err := conn.ListObjectVersionsPagesWithContext(ctx, input, func(page *s3.ListObjectVersionsOutput, lastPage bool) bool {
if page == nil {
return !lastPage
}

n, err := fn(ctx, conn, bucket, page)
nObjects += n

if err != nil {
lastErr = err

return false
}

return !lastPage
})

if err != nil {
return nObjects, fmt.Errorf("listing S3 Bucket (%s) object versions: %w", bucket, err)
}

if lastErr != nil {
return nObjects, lastErr
}

return nObjects, nil
}

// deletePageOfObjectVersions deletes a page (<= 1000) of S3 object versions.
// If `force` is `true` then S3 Object Lock governance mode restrictions are bypassed and
// an attempt is made to remove any S3 Object Lock legal holds.
// Returns the number of objects deleted.
func deletePageOfObjectVersions(ctx context.Context, conn *s3.S3, bucket string, force bool, page *s3.ListObjectVersionsOutput) (int64, error) {
var nObjects int64

toDelete := make([]*s3.ObjectIdentifier, 0, len(page.Versions))
for _, v := range page.Versions {
toDelete = append(toDelete, &s3.ObjectIdentifier{
Key: v.Key,
VersionId: v.VersionId,
})
}

if nObjects = int64(len(toDelete)); nObjects == 0 {
return nObjects, nil
}

input := &s3.DeleteObjectsInput{
Bucket: aws.String(bucket),
Delete: &s3.Delete{
Objects: toDelete,
Quiet: aws.Bool(true), // Only report errors.
},
}

if force {
input.BypassGovernanceRetention = aws.Bool(true)
}

output, err := conn.DeleteObjectsWithContext(ctx, input)

if tfawserr.ErrCodeEquals(err, s3.ErrCodeNoSuchBucket) {
return nObjects, nil
}

if err != nil {
return nObjects, fmt.Errorf("deleting S3 Bucket (%s) objects: %w", bucket, err)
}

nObjects -= int64(len(output.Errors))

var deleteErrs *multierror.Error

for _, v := range output.Errors {
code := aws.StringValue(v.Code)

if code == s3.ErrCodeNoSuchKey {
continue
}

// Attempt to remove any legal hold on the object.
if force && code == ErrCodeAccessDenied {
key := aws.StringValue(v.Key)
versionID := aws.StringValue(v.VersionId)

_, err := conn.PutObjectLegalHoldWithContext(ctx, &s3.PutObjectLegalHoldInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
VersionId: aws.String(versionID),
LegalHold: &s3.ObjectLockLegalHold{
Status: aws.String(s3.ObjectLockLegalHoldStatusOff),
},
})

if err != nil {
// Add the original error and the new error.
deleteErrs = multierror.Append(deleteErrs, newDeleteS3ObjectVersionError(v))
deleteErrs = multierror.Append(deleteErrs, fmt.Errorf("removing legal hold: %w", newS3ObjectVersionError(key, versionID, err)))
} else {
// Attempt to delete the object once the legal hold has been removed.
_, err := conn.DeleteObjectWithContext(ctx, &s3.DeleteObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
VersionId: aws.String(versionID),
})

if err != nil {
deleteErrs = multierror.Append(deleteErrs, fmt.Errorf("deleting: %w", newS3ObjectVersionError(key, versionID, err)))
} else {
nObjects++
}
}
} else {
deleteErrs = multierror.Append(deleteErrs, newDeleteS3ObjectVersionError(v))
}
}

if err := deleteErrs.ErrorOrNil(); err != nil {
return nObjects, fmt.Errorf("deleting S3 Bucket (%s) objects: %w", bucket, err)
}

return nObjects, nil
}

// deletePageOfDeleteMarkers deletes a page (<= 1000) of S3 object delete markers.
// Returns the number of delete markers deleted.
func deletePageOfDeleteMarkers(ctx context.Context, conn *s3.S3, bucket string, page *s3.ListObjectVersionsOutput) (int64, error) {
var nObjects int64

toDelete := make([]*s3.ObjectIdentifier, 0, len(page.Versions))
for _, v := range page.DeleteMarkers {
toDelete = append(toDelete, &s3.ObjectIdentifier{
Key: v.Key,
VersionId: v.VersionId,
})
}

if nObjects = int64(len(toDelete)); nObjects == 0 {
return nObjects, nil
}

input := &s3.DeleteObjectsInput{
Bucket: aws.String(bucket),
Delete: &s3.Delete{
Objects: toDelete,
Quiet: aws.Bool(true), // Only report errors.
},
}

output, err := conn.DeleteObjectsWithContext(ctx, input)

if tfawserr.ErrCodeEquals(err, s3.ErrCodeNoSuchBucket) {
return nObjects, nil
}

if err != nil {
return nObjects, fmt.Errorf("deleting S3 Bucket (%s) delete markers: %w", bucket, err)
}

nObjects -= int64(len(output.Errors))

var deleteErrs *multierror.Error

for _, v := range output.Errors {
deleteErrs = multierror.Append(deleteErrs, newDeleteS3ObjectVersionError(v))
}

if err := deleteErrs.ErrorOrNil(); err != nil {
return nObjects, fmt.Errorf("deleting S3 Bucket (%s) delete markers: %w", bucket, err)
}

return nObjects, nil
}

func newS3ObjectVersionError(key, versionID string, err error) error {
if err == nil {
return nil
}

if versionID == "" {
return fmt.Errorf("S3 object (%s): %w", key, err)
}

return fmt.Errorf("S3 object (%s) version (%s): %w", key, versionID, err)
}

func newDeleteS3ObjectVersionError(err *s3.Error) error {
if err == nil {
return nil
}

awsErr := awserr.New(aws.StringValue(err.Code), aws.StringValue(err.Message), nil)

return fmt.Errorf("deleting: %w", newS3ObjectVersionError(aws.StringValue(err.Key), aws.StringValue(err.VersionId), awsErr))
}
51 changes: 51 additions & 0 deletions internal/service/s3/delete_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package s3_test

import (
"context"
"flag"
"testing"

"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
tfs3 "github.com/hashicorp/terraform-provider-aws/internal/service/s3"
)

// AWS_REGION=us-west-2 go test -v ./internal/service/s3 -run=TestEmptyBucket -b ewbankkit-test-empty-bucket-001 -f

var bucket = flag.String("b", "", "bucket")
var force = flag.Bool("f", false, "force")

func TestEmptyBucket(t *testing.T) {
if *bucket == "" {
t.Skip("bucket not specified")
}

sess := session.Must(session.NewSession())
svc := s3.New(sess)
ctx := context.Background()

n, err := tfs3.EmptyBucket(ctx, svc, *bucket, *force)

if err != nil {
t.Fatalf("error emptying S3 bucket (%s): %s", *bucket, err)
}

t.Logf("%d S3 objects deleted", n)
}

func TestDeleteAllObjectVersions(t *testing.T) {
if *bucket == "" {
t.Skip("bucket not specified")
}

sess := session.Must(session.NewSession())
svc := s3.New(sess)

n, err := tfs3.DeleteAllObjectVersions(svc, *bucket, "", *force, false)

if err != nil {
t.Fatalf("error emptying S3 bucket (%s): %s", *bucket, err)
}

t.Logf("%d S3 objects deleted", n)
}
2 changes: 2 additions & 0 deletions internal/service/s3/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ package s3
// https://docs.aws.amazon.com/sdk-for-go/api/service/s3/#pkg-constants

const (
ErrCodeAccessDenied = "AccessDenied"
ErrCodeBucketNotEmpty = "BucketNotEmpty"
ErrCodeInvalidBucketState = "InvalidBucketState"
ErrCodeInvalidRequest = "InvalidRequest"
ErrCodeMalformedPolicy = "MalformedPolicy"
Expand Down
Loading