Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
782ef31
tfvars: export config type
r4f4 Oct 24, 2023
8f5c083
pkg/infra/aws: add AWS SDK provisioning skeleton
r4f4 Oct 27, 2023
1d345f9
CORS-2885: infra/aws: create VPC with minimal customizations
r4f4 Oct 27, 2023
160c497
infra/aws: add unit tests for VPC creation
r4f4 Oct 27, 2023
380fcf0
infra/aws: create load balancers
r4f4 Oct 27, 2023
7181aa1
infra/aws: unit tests for load balancers
r4f4 Oct 27, 2023
c4a04f4
infra/aws: create DNS resources
r4f4 Oct 28, 2023
593aa9b
infra/aws: add DNS unit tests
r4f4 Oct 28, 2023
fa61d2c
infra/aws: create security group resources
r4f4 Oct 30, 2023
402ef1f
infra/aws: add security group unit tests
r4f4 Oct 30, 2023
b5cdf6f
infra/aws: add instance creation functions
r4f4 Oct 30, 2023
cbd484d
infra/aws: add instance unit tests
r4f4 Oct 31, 2023
3b5a937
infra/aws: add instance profile unit tests
r4f4 Nov 5, 2023
5f02bda
infra/aws: create bootstrap resources
r4f4 Oct 30, 2023
b0b6d5a
infra/aws: create control plane resources
r4f4 Oct 31, 2023
d3f15ef
infra/aws: create compute resources
r4f4 Oct 31, 2023
8c2c5d5
destroy/aws: generalize functions for reuse
r4f4 Nov 2, 2023
4aa2b19
CORS-2833: infra/aws: implement bootstrap destroy
r4f4 Nov 2, 2023
4e3c1de
infra/aws: generate output from cluster provisioning
r4f4 Nov 3, 2023
9a20ef7
CORS-2834: infra/aws: implement bootstrap gather
r4f4 Nov 3, 2023
c8bbdaf
CORS-2878: infra/aws: support existing/shared VPC
r4f4 Nov 3, 2023
b66a588
CORS-2943: infra/aws: copy AMI when not in target region.
r4f4 Nov 3, 2023
8e6aed0
CORS-2944: infra/aws: support CNAME for gov cloud regions
r4f4 Nov 3, 2023
cba8f1d
aws: vendor changes
r4f4 Oct 24, 2023
f8a53e2
CORS-2887: infra/aws: preserve bootstrap ignition when specified
r4f4 Nov 5, 2023
914fb2f
CORS-2880: infra/aws: use user-supplied iam role
r4f4 Nov 5, 2023
258d5e5
CORS-2881: infra/aws: support AWS local zones
r4f4 Nov 6, 2023
651082b
infra/aws: wait for instance to acquire IP address
r4f4 Nov 6, 2023
4183eda
infra/aws: reuse code to associate a subnet with a route table
r4f4 Nov 6, 2023
53ffdb2
CORS-2886: infra/aws: phz and shared VPC support
r4f4 Nov 6, 2023
b75d9eb
infra/aws: use a boolean for public endpoints.
r4f4 Nov 6, 2023
6ed6982
infra/aws: fix public subnets only clusters
r4f4 Nov 8, 2023
393fc2e
infra: enable AWS SDK provider when building altinfra image
r4f4 Nov 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 67 additions & 73 deletions pkg/destroy/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"github.com/aws/aws-sdk-go/aws/endpoints"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/efs"
"github.com/aws/aws-sdk-go/service/iam"
"github.com/aws/aws-sdk-go/service/resourcegroupstaggingapi"
Expand Down Expand Up @@ -158,12 +157,12 @@ func (o *ClusterUninstaller) RunWithContext(ctx context.Context) ([]string, erro
}

iamClient := iam.New(awsSession)
iamRoleSearch := &iamRoleSearch{
client: iamClient,
filters: o.Filters,
logger: o.Logger,
iamRoleSearch := &IamRoleSearch{
Client: iamClient,
Filters: o.Filters,
Logger: o.Logger,
}
iamUserSearch := &iamUserSearch{
iamUserSearch := &IamUserSearch{
client: iamClient,
filters: o.Filters,
logger: o.Logger,
Expand All @@ -180,45 +179,12 @@ func (o *ClusterUninstaller) RunWithContext(ctx context.Context) ([]string, erro
}
}

tracker := new(errorTracker)
tracker := new(ErrorTracker)

// Terminate EC2 instances. The instances need to be terminated first so that we can ensure that there is nothing
// running on the cluster creating new resources while we are attempting to delete resources, which could leak
// the new resources.
ec2Client := ec2.New(awsSession)
lastTerminateTime := time.Now()
err = wait.PollImmediateUntil(
time.Second*10,
func() (done bool, err error) {
instancesRunning, instancesNotTerminated, err := findEC2Instances(ctx, ec2Client, deleted, o.Filters, o.Logger)
if err != nil {
o.Logger.WithError(err).Info("error while finding EC2 instances to delete")
if err := ctx.Err(); err != nil {
return false, err
}
}
if len(instancesNotTerminated) == 0 && len(instancesRunning) == 0 && err == nil {
return true, nil
}
instancesToDelete := instancesRunning
if time.Since(lastTerminateTime) > 10*time.Minute {
instancesToDelete = instancesNotTerminated
lastTerminateTime = time.Now()
}
newlyDeleted, err := o.deleteResources(ctx, awsSession, instancesToDelete, tracker)
// Delete from the resources-to-delete set so that the current state of the resources to delete can be
// returned if the context is completed.
resourcesToDelete = resourcesToDelete.Difference(newlyDeleted)
deleted = deleted.Union(newlyDeleted)
if err != nil {
if err := ctx.Err(); err != nil {
return false, err
}
}
return false, nil
},
ctx.Done(),
)
err = DeleteEC2Instances(ctx, o.Logger, awsSession, o.Filters, resourcesToDelete, deleted, tracker)
if err != nil {
return resourcesToDelete.UnsortedList(), err
}
Expand All @@ -227,7 +193,7 @@ func (o *ClusterUninstaller) RunWithContext(ctx context.Context) ([]string, erro
err = wait.PollImmediateUntil(
time.Second*10,
func() (done bool, err error) {
newlyDeleted, loopError := o.deleteResources(ctx, awsSession, resourcesToDelete.UnsortedList(), tracker)
newlyDeleted, loopError := DeleteResources(ctx, o.Logger, awsSession, resourcesToDelete.UnsortedList(), tracker)
// Delete from the resources-to-delete set so that the current state of the resources to delete can be
// returned if the context is completed.
resourcesToDelete = resourcesToDelete.Difference(newlyDeleted)
Expand Down Expand Up @@ -292,14 +258,43 @@ func (o *ClusterUninstaller) findUntaggableResources(ctx context.Context, iamCli

// findResourcesToDelete returns the resources that should be deleted.
//
// tagClients - clients of the tagging API to use to search for resources.
// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored.
// tagClients - clients of the tagging API to use to search for resources.
// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored.
func (o *ClusterUninstaller) findResourcesToDelete(
ctx context.Context,
tagClients []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI,
iamClient *iam.IAM,
iamRoleSearch *iamRoleSearch,
iamUserSearch *iamUserSearch,
iamRoleSearch *IamRoleSearch,
iamUserSearch *IamUserSearch,
deleted sets.Set[string],
) (sets.Set[string], []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI, error) {
var errs []error
resources, tagClients, err := FindTaggedResourcesToDelete(ctx, o.Logger, tagClients, o.Filters, iamRoleSearch, iamUserSearch, deleted)
if err != nil {
errs = append(errs, err)
}

// Find untaggable resources
untaggableResources, err := o.findUntaggableResources(ctx, iamClient, deleted)
if err != nil {
errs = append(errs, err)
}
resources = resources.Union(untaggableResources)

return resources, tagClients, utilerrors.NewAggregate(errs)
}

// FindTaggedResourcesToDelete returns the tagged resources that should be deleted.
//
// tagClients - clients of the tagging API to use to search for resources.
// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored.
func FindTaggedResourcesToDelete(
ctx context.Context,
logger logrus.FieldLogger,
tagClients []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI,
filters []Filter,
iamRoleSearch *IamRoleSearch,
iamUserSearch *IamUserSearch,
deleted sets.Set[string],
) (sets.Set[string], []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI, error) {
resources := sets.New[string]()
Expand All @@ -308,7 +303,7 @@ func (o *ClusterUninstaller) findResourcesToDelete(

// Find resources by tag
for _, tagClient := range tagClients {
resourcesInTagClient, err := o.findResourcesByTag(ctx, tagClient, deleted)
resourcesInTagClient, err := findResourcesByTag(ctx, logger, tagClient, filters, deleted)
if err != nil {
errs = append(errs, err)
}
Expand All @@ -318,30 +313,27 @@ func (o *ClusterUninstaller) findResourcesToDelete(
if len(resourcesInTagClient) > 0 || err != nil {
tagClientsWithResources = append(tagClientsWithResources, tagClient)
} else {
o.Logger.Debugf("no deletions from %s, removing client", *tagClient.Config.Region)
logger.Debugf("no deletions from %s, removing client", *tagClient.Config.Region)
}
}

// Find IAM roles
iamRoleResources, err := findIAMRoles(ctx, iamRoleSearch, deleted, o.Logger)
if err != nil {
errs = append(errs, err)
if iamRoleSearch != nil {
iamRoleResources, err := findIAMRoles(ctx, iamRoleSearch, deleted, logger)
if err != nil {
errs = append(errs, err)
}
resources = resources.Union(iamRoleResources)
}
resources = resources.Union(iamRoleResources)

// Find IAM users
iamUserResources, err := findIAMUsers(ctx, iamUserSearch, deleted, o.Logger)
if err != nil {
errs = append(errs, err)
}
resources = resources.Union(iamUserResources)

// Find untaggable resources
untaggableResources, err := o.findUntaggableResources(ctx, iamClient, deleted)
if err != nil {
errs = append(errs, err)
if iamUserSearch != nil {
iamUserResources, err := findIAMUsers(ctx, iamUserSearch, deleted, logger)
if err != nil {
errs = append(errs, err)
}
resources = resources.Union(iamUserResources)
}
resources = resources.Union(untaggableResources)

return resources, tagClientsWithResources, utilerrors.NewAggregate(errs)
}
Expand All @@ -350,14 +342,16 @@ func (o *ClusterUninstaller) findResourcesToDelete(
//
// tagClients - clients of the tagging API to use to search for resources.
// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored.
func (o *ClusterUninstaller) findResourcesByTag(
func findResourcesByTag(
ctx context.Context,
logger logrus.FieldLogger,
tagClient *resourcegroupstaggingapi.ResourceGroupsTaggingAPI,
filters []Filter,
deleted sets.Set[string],
) (sets.Set[string], error) {
resources := sets.New[string]()
for _, filter := range o.Filters {
o.Logger.Debugf("search for matching resources by tag in %s matching %#+v", *tagClient.Config.Region, filter)
for _, filter := range filters {
logger.Debugf("search for matching resources by tag in %s matching %#+v", *tagClient.Config.Region, filter)
tagFilters := make([]*resourcegroupstaggingapi.TagFilter, 0, len(filter))
for key, value := range filter {
tagFilters = append(tagFilters, &resourcegroupstaggingapi.TagFilter{
Expand All @@ -380,29 +374,29 @@ func (o *ClusterUninstaller) findResourcesByTag(
)
if err != nil {
err = errors.Wrap(err, "get tagged resources")
o.Logger.Info(err)
logger.Info(err)
return resources, err
}
}
return resources, nil
}

// deleteResources deletes the specified resources.
// DeleteResources deletes the specified resources.
//
// resources - the resources to be deleted.
//
// The first return is the ARNs of the resources that were successfully deleted
func (o *ClusterUninstaller) deleteResources(ctx context.Context, awsSession *session.Session, resources []string, tracker *errorTracker) (sets.Set[string], error) {
func DeleteResources(ctx context.Context, logger logrus.FieldLogger, awsSession *session.Session, resources []string, tracker *ErrorTracker) (sets.Set[string], error) {
deleted := sets.New[string]()
for _, arnString := range resources {
logger := o.Logger.WithField("arn", arnString)
l := logger.WithField("arn", arnString)
parsedARN, err := arn.Parse(arnString)
if err != nil {
logger.WithError(err).Debug("could not parse ARN")
l.WithError(err).Debug("could not parse ARN")
continue
}
if err := deleteARN(ctx, awsSession, parsedARN, o.Logger); err != nil {
tracker.suppressWarning(arnString, err, logger)
if err := deleteARN(ctx, awsSession, parsedARN, logger); err != nil {
tracker.suppressWarning(arnString, err, l)
if err := ctx.Err(); err != nil {
return deleted, err
}
Expand Down
38 changes: 38 additions & 0 deletions pkg/destroy/aws/ec2helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package aws
import (
"context"
"fmt"
"time"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/arn"
Expand All @@ -16,6 +17,7 @@ import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
)

// findEC2Instances returns the EC2 instances with tags that satisfy the filters.
Expand Down Expand Up @@ -85,6 +87,42 @@ func findEC2Instances(ctx context.Context, ec2Client *ec2.EC2, deleted sets.Set[
return resourcesRunning, resourcesNotTerminated, nil
}

// DeleteEC2Instances terminates all EC2 instances found.
func DeleteEC2Instances(ctx context.Context, logger logrus.FieldLogger, awsSession *session.Session, filters []Filter, toDelete sets.Set[string], deleted sets.Set[string], tracker *ErrorTracker) error {
ec2Client := ec2.New(awsSession)
lastTerminateTime := time.Now()
err := wait.PollUntilContextCancel(
ctx,
time.Second*10,
true,
func(ctx context.Context) (bool, error) {
instancesRunning, instancesNotTerminated, err := findEC2Instances(ctx, ec2Client, deleted, filters, logger)
if err != nil {
logger.WithError(err).Info("error while finding EC2 instances to delete")
return false, nil
}
if len(instancesNotTerminated) == 0 && len(instancesRunning) == 0 {
return true, nil
}
instancesToDelete := instancesRunning
if time.Since(lastTerminateTime) > 10*time.Minute {
instancesToDelete = instancesNotTerminated
lastTerminateTime = time.Now()
}
newlyDeleted, err := DeleteResources(ctx, logger, awsSession, instancesToDelete, tracker)
// Delete from the resources-to-delete set so that the current state of the resources to delete can be
// returned if the context is completed.
toDelete = toDelete.Difference(newlyDeleted)
deleted = deleted.Union(newlyDeleted)
if err != nil {
logger.WithError(err).Info("error while deleting EC2 instances")
}
return false, nil
},
)
return err
}

func deleteEC2(ctx context.Context, session *session.Session, arn arn.ARN, logger logrus.FieldLogger) error {
client := ec2.New(session)

Expand Down
6 changes: 3 additions & 3 deletions pkg/destroy/aws/errortracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ const (
suppressDuration = time.Minute * 5
)

// errorTracker holds a history of errors
type errorTracker struct {
// ErrorTracker holds a history of errors.
type ErrorTracker struct {
history map[string]time.Time
}

// suppressWarning logs errors WARN once every duration and the rest to DEBUG
func (o *errorTracker) suppressWarning(identifier string, err error, logger logrus.FieldLogger) {
func (o *ErrorTracker) suppressWarning(identifier string, err error, logger logrus.FieldLogger) {
if o.history == nil {
o.history = map[string]time.Time{}
}
Expand Down
Loading