From 673377e280759f824d15ec84546cdd25252dfe6c Mon Sep 17 00:00:00 2001 From: staebler Date: Tue, 13 Oct 2020 17:39:57 -0400 Subject: [PATCH 1/2] vendor: bump installer version --- go.mod | 2 + go.sum | 4 +- .../installer/pkg/destroy/aws/aws.go | 541 +++++++++++------- .../installer/pkg/types/openstack/platform.go | 8 +- vendor/modules.txt | 3 +- 5 files changed, 333 insertions(+), 225 deletions(-) diff --git a/go.mod b/go.mod index 4581a5e489c..36c6b784e4d 100644 --- a/go.mod +++ b/go.mod @@ -90,3 +90,5 @@ replace github.com/openshift/library-go => github.com/openshift/library-go v0.0. // temporary hack fix for https://github.com/kubernetes/kubernetes/issues/95300 replace k8s.io/apiserver => github.com/staebler/apiserver v0.19.1-0.20201005174924-a3ef0d1e45df + +replace github.com/openshift/installer => github.com/staebler/installer v0.9.0-master.0.20201010200101-5a09a63bb753 diff --git a/go.sum b/go.sum index cc54f193ed0..8019ac9f6e5 100644 --- a/go.sum +++ b/go.sum @@ -1317,8 +1317,6 @@ github.com/openshift/cluster-autoscaler-operator v0.0.0-20190521201101-62768a6ba github.com/openshift/cluster-version-operator v3.11.1-0.20190629164025-08cac1c02538+incompatible/go.mod h1:0BbpR1mrN0F2ZRae5N1XHcytmkvVPaeKgSQwRRBWugc= github.com/openshift/generic-admission-server v1.14.1-0.20200903115324-4ddcdd976480 h1:y47BAJFepK8Xls1c+quIOyc46OXiT9LRiqGVjIaMlSA= github.com/openshift/generic-admission-server v1.14.1-0.20200903115324-4ddcdd976480/go.mod h1:OAHL5WnZphlhVEf5fTdeGLvNwMu1B2zCWpmxJpCA35o= -github.com/openshift/installer v0.9.0-master.0.20201006081509-887875ea9cf9 h1:sW+qISOi7iHyXrAdl/lUANiCc4CNrYqjTmyshUyZKMs= -github.com/openshift/installer v0.9.0-master.0.20201006081509-887875ea9cf9/go.mod h1:gj5hubG057ciWCCBUq2+BK29ELoySn5H1vSP842n/uU= github.com/openshift/library-go v0.0.0-20200918101923-1e4c94603efe h1:MJqGN0NVONnTLDYPVIEH4uo6i3gAK7LAkhLnyFO8Je0= github.com/openshift/library-go v0.0.0-20200918101923-1e4c94603efe/go.mod h1:NI6xOQGuTnLXeHW8Z2glKSFhF7X+YxlAlqlBMaK0zEM= github.com/openshift/machine-api-operator v0.0.0-20190312153711-9650e16c9880/go.mod h1:7HeAh0v04zQn1L+4ItUjvpBQYsm2Nf81WaZLiXTcnkc= @@ -1589,6 +1587,8 @@ github.com/spf13/viper v1.6.1 h1:VPZzIkznI1YhVMRi6vNFLHSwhnhReBfgTxIPccpfdZk= github.com/spf13/viper v1.6.1/go.mod h1:t3iDnF5Jlj76alVNuyFBk5oUMCvsrkbvZK0WQdfDi5k= github.com/staebler/apiserver v0.19.1-0.20201005174924-a3ef0d1e45df h1:tOLmJyPkBiNtX5vm5bMRIsCwjEljkNeeoB0IBi5V6VU= github.com/staebler/apiserver v0.19.1-0.20201005174924-a3ef0d1e45df/go.mod h1:XvzqavYj73931x7FLtyagh8WibHpePJ1QwWrSJs2CLk= +github.com/staebler/installer v0.9.0-master.0.20201010200101-5a09a63bb753 h1:qG3ZoJqMVTCf22CCvxtEGrQ/SrwxuIzzMrrcEQBCFZE= +github.com/staebler/installer v0.9.0-master.0.20201010200101-5a09a63bb753/go.mod h1:gj5hubG057ciWCCBUq2+BK29ELoySn5H1vSP842n/uU= github.com/stoewer/go-strcase v1.0.2/go.mod h1:eLfe5bL3qbL7ep/KafHzthxejrOF5J3xmt03uL5tzek= github.com/stoewer/go-strcase v1.1.0/go.mod h1:G7YglbHPK5jX3JcWljxVXRXPh90/dtxfy6xWqxu5b90= github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= diff --git a/vendor/github.com/openshift/installer/pkg/destroy/aws/aws.go b/vendor/github.com/openshift/installer/pkg/destroy/aws/aws.go index edcd73eda03..ff835dd6e55 100644 --- a/vendor/github.com/openshift/installer/pkg/destroy/aws/aws.go +++ b/vendor/github.com/openshift/installer/pkg/destroy/aws/aws.go @@ -22,6 +22,8 @@ import ( "github.com/aws/aws-sdk-go/service/s3/s3manager" "github.com/pkg/errors" "github.com/sirupsen/logrus" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" awssession "github.com/openshift/installer/pkg/asset/installconfig/aws" @@ -101,15 +103,16 @@ func (o *ClusterUninstaller) validate() error { // Run is the entrypoint to start the uninstall process func (o *ClusterUninstaller) Run() error { - return o.RunWithContext(context.Background()) + _, err := o.RunWithContext(context.Background()) + return err } // RunWithContext runs the uninstall process with a context. // The first return is the list of ARNs for resources that could not be destroyed. -func (o *ClusterUninstaller) RunWithContext(ctx context.Context) error { +func (o *ClusterUninstaller) RunWithContext(ctx context.Context) ([]string, error) { err := o.validate() if err != nil { - return err + return nil, err } awsSession := o.Session @@ -117,7 +120,7 @@ func (o *ClusterUninstaller) RunWithContext(ctx context.Context) error { // Relying on appropriate AWS ENV vars (eg AWS_PROFILE, AWS_ACCESS_KEY_ID, etc) awsSession, err = session.NewSession(aws.NewConfig().WithRegion(o.Region)) if err != nil { - return err + return nil, err } } awsSession.Handlers.Build.PushBackNamed(request.NamedHandler{ @@ -128,28 +131,22 @@ func (o *ClusterUninstaller) RunWithContext(ctx context.Context) error { tagClients := []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI{ resourcegroupstaggingapi.New(awsSession), } - tagClientNames := map[*resourcegroupstaggingapi.ResourceGroupsTaggingAPI]string{ - tagClients[0]: o.Region, - } switch o.Region { case endpoints.CnNorth1RegionID, endpoints.CnNorthwest1RegionID: if o.Region != endpoints.CnNorthwest1RegionID { - tagClient := resourcegroupstaggingapi.New(awsSession, aws.NewConfig().WithRegion(endpoints.CnNorthwest1RegionID)) - tagClients = append(tagClients, tagClient) - tagClientNames[tagClient] = endpoints.CnNorthwest1RegionID + tagClients = append(tagClients, + resourcegroupstaggingapi.New(awsSession, aws.NewConfig().WithRegion(endpoints.CnNorthwest1RegionID))) } case endpoints.UsGovEast1RegionID, endpoints.UsGovWest1RegionID: if o.Region != endpoints.UsGovWest1RegionID { - tagClient := resourcegroupstaggingapi.New(awsSession, aws.NewConfig().WithRegion(endpoints.UsGovWest1RegionID)) - tagClients = append(tagClients, tagClient) - tagClientNames[tagClient] = endpoints.UsGovWest1RegionID + tagClients = append(tagClients, + resourcegroupstaggingapi.New(awsSession, aws.NewConfig().WithRegion(endpoints.UsGovWest1RegionID))) } default: if o.Region != endpoints.UsEast1RegionID { - tagClient := resourcegroupstaggingapi.New(awsSession, aws.NewConfig().WithRegion(endpoints.UsEast1RegionID)) - tagClients = append(tagClients, tagClient) - tagClientNames[tagClient] = endpoints.UsEast1RegionID + tagClients = append(tagClients, + resourcegroupstaggingapi.New(awsSession, aws.NewConfig().WithRegion(endpoints.UsEast1RegionID))) } } @@ -165,132 +162,322 @@ func (o *ClusterUninstaller) RunWithContext(ctx context.Context) error { logger: o.Logger, } - deleted, err := terminateEC2InstancesByTags(ctx, ec2.New(awsSession), iamClient, o.Filters, o.Logger) + // Get the initial resources to delete, so that they can be returned if the context is canceled while terminating + // instances. + deleted := sets.NewString() + resourcesToDelete, tagClientsWithResources, err := o.findResourcesToDelete(ctx, tagClients, iamClient, iamRoleSearch, iamUserSearch, deleted) if err != nil { - return err + o.Logger.WithError(err).Info("error while finding resources to delete") + if err := ctx.Err(); err != nil { + return resourcesToDelete.UnsortedList(), err + } } tracker := new(errorTracker) - tagClientStack := append([]*resourcegroupstaggingapi.ResourceGroupsTaggingAPI(nil), tagClients...) + + // Terminate EC2 instances. + ec2Client := ec2.New(awsSession) err = wait.PollImmediateUntil( time.Second*10, func() (done bool, err error) { - var loopError error - nextTagClients := tagClients[:0] - for _, tagClient := range tagClientStack { - matched := false - for _, filter := range o.Filters { - o.Logger.Debugf("search for and delete matching resources by tag in %s matching %#+v", tagClientNames[tagClient], filter) - tagFilters := make([]*resourcegroupstaggingapi.TagFilter, 0, len(filter)) - for key, value := range filter { - tagFilters = append(tagFilters, &resourcegroupstaggingapi.TagFilter{ - Key: aws.String(key), - Values: []*string{aws.String(value)}, - }) - } - err = tagClient.GetResourcesPagesWithContext( - ctx, - &resourcegroupstaggingapi.GetResourcesInput{TagFilters: tagFilters}, - func(results *resourcegroupstaggingapi.GetResourcesOutput, lastPage bool) bool { - for _, resource := range results.ResourceTagMappingList { - arnString := *resource.ResourceARN - if _, ok := deleted[arnString]; !ok { - arnLogger := o.Logger.WithField("arn", arnString) - matched = true - parsed, err := arn.Parse(arnString) - if err != nil { - arnLogger.Debug(err) - continue - } - - err = deleteARN(ctx, awsSession, parsed, filter, arnLogger) - if err != nil { - tracker.suppressWarning(arnString, err, arnLogger) - err = errors.Wrapf(err, "deleting %s", arnString) - continue - } - deleted[arnString] = exists - } - } - - return !lastPage - }, - ) - if err != nil { - err = errors.Wrap(err, "get tagged resources") - o.Logger.Info(err) - matched = true - loopError = err - } - } - - if matched { - nextTagClients = append(nextTagClients, tagClient) - } else { - o.Logger.Debugf("no deletions from %s, removing client", tagClientNames[tagClient]) + instancesToDelete, err := o.findEC2Instances(ctx, ec2Client, deleted) + if err != nil { + o.Logger.WithError(err).Info("error while finding EC2 instances to delete") + if err := ctx.Err(); err != nil { + return false, err } } - tagClientStack = nextTagClients - - o.Logger.Debug("search for IAM roles") - arns, err := iamRoleSearch.arns(ctx) + if len(instancesToDelete) == 0 && err == nil { + return true, nil + } + newlyDeleted, err := o.deleteResources(ctx, awsSession, instancesToDelete, tracker) + // Delete from the resources-to-delete set so that the current state of the resources to delete can be + // returned if the context is completed. + resourcesToDelete = resourcesToDelete.Difference(newlyDeleted) + deleted = deleted.Union(newlyDeleted) if err != nil { - o.Logger.Info(err) - loopError = err + if err := ctx.Err(); err != nil { + return false, err + } } + return false, nil + }, + ctx.Done(), + ) + if err != nil { + return resourcesToDelete.UnsortedList(), err + } - o.Logger.Debug("search for IAM users") - userARNs, err := iamUserSearch.arns(ctx) + // Delete the rest of the resources. + err = wait.PollImmediateUntil( + time.Second*10, + func() (done bool, err error) { + newlyDeleted, loopError := o.deleteResources(ctx, awsSession, resourcesToDelete.UnsortedList(), tracker) + // Delete from the resources-to-delete set so that the current state of the resources to delete can be + // returned if the context is completed. + resourcesToDelete = resourcesToDelete.Difference(newlyDeleted) + deleted = deleted.Union(newlyDeleted) + if loopError != nil { + if err := ctx.Err(); err != nil { + return false, err + } + } + // Store resources to delete in a temporary variable so that, in case the context is cancelled, the current + // resources to delete are not lost. + nextResourcesToDelete, nextTagClients, err := o.findResourcesToDelete(ctx, tagClientsWithResources, iamClient, iamRoleSearch, iamUserSearch, deleted) if err != nil { - o.Logger.Info(err) - loopError = err + o.Logger.WithError(err).Info("error while finding resources to delete") + if err := ctx.Err(); err != nil { + return false, err + } + loopError = errors.Wrap(err, "error while finding resources to delete") } - arns = append(arns, userARNs...) + resourcesToDelete = nextResourcesToDelete + tagClientsWithResources = nextTagClients + return len(resourcesToDelete) == 0 && loopError == nil, nil + }, + ctx.Done(), + ) + if err != nil { + return resourcesToDelete.UnsortedList(), err + } - if len(arns) > 0 { - o.Logger.Debug("delete IAM roles and users") - } - for _, arnString := range arns { - if _, ok := deleted[arnString]; !ok { - arnLogger := o.Logger.WithField("arn", arnString) - parsed, err := arn.Parse(arnString) - if err != nil { - arnLogger.Debug(err) - loopError = err + err = removeSharedTags(ctx, tagClients, o.Filters, o.Logger) + if err != nil { + return nil, err + } + + return nil, nil +} + +// findEC2Instances returns the EC2 instances with tags that satisfy the filters. +// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored. +func (o *ClusterUninstaller) findEC2Instances(ctx context.Context, ec2Client *ec2.EC2, deleted sets.String) ([]string, error) { + if ec2Client.Config.Region == nil { + return nil, errors.New("EC2 client does not have region configured") + } + + partition, ok := endpoints.PartitionForRegion(endpoints.DefaultPartitions(), *ec2Client.Config.Region) + if !ok { + return nil, errors.Errorf("no partition found for region %q", *ec2Client.Config.Region) + } + + var resources []string + for _, filter := range o.Filters { + o.Logger.Debugf("search for instances by tag matching %#+v", filter) + instanceFilters := make([]*ec2.Filter, 0, len(filter)) + for key, value := range filter { + instanceFilters = append(instanceFilters, &ec2.Filter{ + Name: aws.String("tag:" + key), + Values: []*string{aws.String(value)}, + }) + } + err := ec2Client.DescribeInstancesPagesWithContext( + ctx, + &ec2.DescribeInstancesInput{Filters: instanceFilters}, + func(results *ec2.DescribeInstancesOutput, lastPage bool) bool { + for _, reservation := range results.Reservations { + if reservation.OwnerId == nil { continue } - err = deleteARN(ctx, awsSession, parsed, nil, arnLogger) - if err != nil { - tracker.suppressWarning(arnString, err, arnLogger) - err = errors.Wrapf(err, "deleting %s", arnString) - loopError = err - continue + for _, instance := range reservation.Instances { + if instance.InstanceId == nil || instance.State == nil { + continue + } + + instanceLogger := o.Logger.WithField("instance", *instance.InstanceId) + arn := fmt.Sprintf("arn:%s:ec2:%s:%s:instance/%s", partition.ID(), *ec2Client.Config.Region, *reservation.OwnerId, *instance.InstanceId) + if *instance.State.Name == "terminated" { + if !deleted.Has(arn) { + instanceLogger.Info("Terminated") + deleted.Insert(arn) + } + } else { + resources = append(resources, arn) + } } - deleted[arnString] = exists } - } + return !lastPage + }, + ) + if err != nil { + err = errors.Wrap(err, "get ec2 instances") + o.Logger.Info(err) + return resources, err + } + } + return resources, nil +} - return len(tagClientStack) == 0 && loopError == nil, nil - }, - ctx.Done(), - ) +// findResourcesToDelete returns the resources that should be deleted. +// tagClients - clients of the tagging API to use to search for resources. +// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored. +func (o *ClusterUninstaller) findResourcesToDelete( + ctx context.Context, + tagClients []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI, + iamClient *iam.IAM, + iamRoleSearch *iamRoleSearch, + iamUserSearch *iamUserSearch, + deleted sets.String, +) (sets.String, []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI, error) { + resources := sets.NewString() + var tagClientsWithResources []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI + var errs []error + + // Find resources by tag + for _, tagClient := range tagClients { + resourcesInTagClient, err := o.findResourcesByTag(ctx, tagClient, deleted) + if err != nil { + errs = append(errs, err) + } + o.Logger.Debugf("found %d resources in %s", len(resourcesInTagClient), *tagClient.Config.Region) + resources = resources.Union(resourcesInTagClient) + // If there are still resources to be deleted for the tag client or if there was an error getting the resources + // for the tag client, then retain the tag client for future queries. + if len(resourcesInTagClient) > 0 || err != nil { + tagClientsWithResources = append(tagClientsWithResources, tagClient) + } else { + o.Logger.Debugf("no deletions from %s, removing client", *tagClient.Config.Region) + } + } + + // Find IAM roles + iamRoleResources, err := o.findIAMRoles(ctx, iamRoleSearch, deleted) if err != nil { - return err + errs = append(errs, err) } + o.Logger.Debugf("found %d IAM role resources", len(iamRoleResources)) + resources = resources.Union(iamRoleResources) - o.Logger.Debug("search for untaggable resources") - if err := o.deleteUntaggedResources(ctx, awsSession); err != nil { - o.Logger.Debug(err) - return err + // Find IAM users + iamUserResources, err := o.findIAMUsers(ctx, iamUserSearch, deleted) + if err != nil { + errs = append(errs, err) } + o.Logger.Debugf("found %d IAM user resources", len(iamUserResources)) + resources = resources.Union(iamUserResources) - err = removeSharedTags(ctx, tagClients, tagClientNames, o.Filters, o.Logger) + // Find untaggable resources + untaggableResources, err := o.findUntaggableResources(ctx, iamClient, deleted) if err != nil { - return err + errs = append(errs, err) } + o.Logger.Debugf("found %d untaggable resources", len(untaggableResources)) + resources = resources.Union(untaggableResources) - return nil + o.Logger.Debugf("found %d total resources", len(resources)) + return resources, tagClientsWithResources, utilerrors.NewAggregate(errs) +} + +// findResourcesByTag returns the resources with tags that satisfy the filters. +// tagClients - clients of the tagging API to use to search for resources. +// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored. +func (o *ClusterUninstaller) findResourcesByTag( + ctx context.Context, + tagClient *resourcegroupstaggingapi.ResourceGroupsTaggingAPI, + deleted sets.String, +) (sets.String, error) { + resources := sets.NewString() + for _, filter := range o.Filters { + o.Logger.Debugf("search for matching resources by tag in %s matching %#+v", *tagClient.Config.Region, filter) + tagFilters := make([]*resourcegroupstaggingapi.TagFilter, 0, len(filter)) + for key, value := range filter { + tagFilters = append(tagFilters, &resourcegroupstaggingapi.TagFilter{ + Key: aws.String(key), + Values: []*string{aws.String(value)}, + }) + } + err := tagClient.GetResourcesPagesWithContext( + ctx, + &resourcegroupstaggingapi.GetResourcesInput{TagFilters: tagFilters}, + func(results *resourcegroupstaggingapi.GetResourcesOutput, lastPage bool) bool { + for _, resource := range results.ResourceTagMappingList { + arnString := *resource.ResourceARN + if !deleted.Has(arnString) { + resources.Insert(arnString) + } + } + return !lastPage + }, + ) + if err != nil { + err = errors.Wrap(err, "get tagged resources") + o.Logger.Info(err) + return resources, err + } + } + return resources, nil +} + +// findIAMRoles returns the IAM roles for the cluster. +// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored. +func (o *ClusterUninstaller) findIAMRoles(ctx context.Context, search *iamRoleSearch, deleted sets.String) (sets.String, error) { + o.Logger.Debug("search for IAM roles") + resources, err := search.arns(ctx) + if err != nil { + o.Logger.Info(err) + return nil, err + } + return sets.NewString(resources...).Difference(deleted), nil +} + +// findIAMUsers returns the IAM users for the cluster. +// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored. +func (o *ClusterUninstaller) findIAMUsers(ctx context.Context, search *iamUserSearch, deleted sets.String) (sets.String, error) { + o.Logger.Debug("search for IAM users") + resources, err := search.arns(ctx) + if err != nil { + o.Logger.Info(err) + return nil, err + } + return sets.NewString(resources...).Difference(deleted), nil +} + +// findUntaggableResources returns the resources for the cluster that cannot be tagged. +// deleted - the resources that have already been deleted. Any resources specified in this set will be ignored. +func (o *ClusterUninstaller) findUntaggableResources(ctx context.Context, iamClient *iam.IAM, deleted sets.String) (sets.String, error) { + resources := sets.NewString() + o.Logger.Debug("search for IAM instance profiles") + for _, profileType := range []string{"master", "worker"} { + profile := fmt.Sprintf("%s-%s-profile", o.ClusterID, profileType) + response, err := iamClient.GetInstanceProfileWithContext(ctx, &iam.GetInstanceProfileInput{InstanceProfileName: &profile}) + if err != nil { + if err.(awserr.Error).Code() == iam.ErrCodeNoSuchEntityException { + continue + } + return resources, errors.Wrap(err, "failed to get IAM instance profile") + } + arnString := *response.InstanceProfile.Arn + if !deleted.Has(arnString) { + resources.Insert(arnString) + } + } + return resources, nil +} + +// deleteResources deletes the specified resources. +// resources - the resources to be deleted. +// The first return is the ARNs of the resources that were successfully deleted +func (o *ClusterUninstaller) deleteResources(ctx context.Context, awsSession *session.Session, resources []string, tracker *errorTracker) (sets.String, error) { + deleted := sets.NewString() + for _, arnString := range resources { + logger := o.Logger.WithField("arn", arnString) + parsedARN, err := arn.Parse(arnString) + if err != nil { + logger.WithError(err).Debug("could not parse ARN") + continue + } + if err := deleteARN(ctx, awsSession, parsedARN, o.Logger); err != nil { + tracker.suppressWarning(arnString, err, logger) + if err := ctx.Err(); err != nil { + return deleted, err + } + continue + } + deleted.Insert(arnString) + } + return deleted, nil } func splitSlash(name string, input string) (base string, suffix string, err error) { @@ -322,17 +509,6 @@ func tagMatch(filters []Filter, tags map[string]string) bool { return len(filters) == 0 } -func tagsForFilter(filter Filter) []*ec2.Tag { - tags := make([]*ec2.Tag, 0, len(filter)) - for key, value := range filter { - tags = append(tags, &ec2.Tag{ - Key: aws.String(key), - Value: aws.String(value), - }) - } - return tags -} - type iamRoleSearch struct { client *iam.IAM filters []Filter @@ -531,10 +707,10 @@ func findPublicRoute53(ctx context.Context, client *route53.Route53, dnsName str return "", nil } -func deleteARN(ctx context.Context, session *session.Session, arn arn.ARN, filter Filter, logger logrus.FieldLogger) error { +func deleteARN(ctx context.Context, session *session.Session, arn arn.ARN, logger logrus.FieldLogger) error { switch arn.Service { case "ec2": - return deleteEC2(ctx, session, arn, filter, logger) + return deleteEC2(ctx, session, arn, logger) case "elasticloadbalancing": return deleteElasticLoadBalancing(ctx, session, arn, logger) case "iam": @@ -548,7 +724,7 @@ func deleteARN(ctx context.Context, session *session.Session, arn arn.ARN, filte } } -func deleteEC2(ctx context.Context, session *session.Session, arn arn.ARN, filter Filter, logger logrus.FieldLogger) error { +func deleteEC2(ctx context.Context, session *session.Session, arn arn.ARN, logger logrus.FieldLogger) error { client := ec2.New(session) resourceType, id, err := splitSlash("resource", arn.Resource) @@ -563,7 +739,7 @@ func deleteEC2(ctx context.Context, session *session.Session, arn arn.ARN, filte case "elastic-ip": return deleteEC2ElasticIP(ctx, client, id, logger) case "image": - return deleteEC2Image(ctx, client, id, filter, logger) + return deleteEC2Image(ctx, client, id, logger) case "instance": return terminateEC2Instance(ctx, client, iam.New(session), id, logger) case "internet-gateway": @@ -608,7 +784,7 @@ func deleteEC2DHCPOptions(ctx context.Context, client *ec2.EC2, id string, logge return nil } -func deleteEC2Image(ctx context.Context, client *ec2.EC2, id string, filter Filter, logger logrus.FieldLogger) error { +func deleteEC2Image(ctx context.Context, client *ec2.EC2, id string, logger logrus.FieldLogger) error { // tag the snapshots used by the AMI so that the snapshots are matched // by the filter and deleted response, err := client.DescribeImagesWithContext(ctx, &ec2.DescribeImagesInput{ @@ -620,20 +796,22 @@ func deleteEC2Image(ctx context.Context, client *ec2.EC2, id string, filter Filt } return err } - snapshots := []*string{} for _, image := range response.Images { + var snapshots []*string for _, bdm := range image.BlockDeviceMappings { if bdm.Ebs != nil && bdm.Ebs.SnapshotId != nil { snapshots = append(snapshots, bdm.Ebs.SnapshotId) } } - } - _, err = client.CreateTagsWithContext(ctx, &ec2.CreateTagsInput{ - Resources: snapshots, - Tags: tagsForFilter(filter), - }) - if err != nil { - err = errors.Wrapf(err, "tagging snapshots for %s", id) + if len(snapshots) != 0 { + _, err = client.CreateTagsWithContext(ctx, &ec2.CreateTagsInput{ + Resources: snapshots, + Tags: image.Tags, + }) + if err != nil { + err = errors.Wrapf(err, "tagging snapshots for %s", id) + } + } } _, err = client.DeregisterImageWithContext(ctx, &ec2.DeregisterImageInput{ @@ -716,85 +894,6 @@ func terminateEC2InstanceByInstance(ctx context.Context, ec2Client *ec2.EC2, iam return nil } -// terminateEC2InstancesByTags loops until there all instances which -// match the given tags are terminated. -func terminateEC2InstancesByTags(ctx context.Context, ec2Client *ec2.EC2, iamClient *iam.IAM, filters []Filter, logger logrus.FieldLogger) (map[string]struct{}, error) { - if ec2Client.Config.Region == nil { - return nil, errors.New("EC2 client does not have region configured") - } - - partition, ok := endpoints.PartitionForRegion(endpoints.DefaultPartitions(), *ec2Client.Config.Region) - if !ok { - return nil, errors.Errorf("no partition found for region %q", *ec2Client.Config.Region) - } - - terminated := map[string]struct{}{} - err := wait.PollImmediateUntil( - time.Second*10, - func() (done bool, err error) { - var loopError error - matched := false - for _, filter := range filters { - logger.Debugf("search for and delete matching instances by tag matching %#+v", filter) - instanceFilters := make([]*ec2.Filter, 0, len(filter)) - for key, value := range filter { - instanceFilters = append(instanceFilters, &ec2.Filter{ - Name: aws.String("tag:" + key), - Values: []*string{aws.String(value)}, - }) - } - err = ec2Client.DescribeInstancesPagesWithContext( - ctx, - &ec2.DescribeInstancesInput{Filters: instanceFilters}, - func(results *ec2.DescribeInstancesOutput, lastPage bool) bool { - for _, reservation := range results.Reservations { - if reservation.OwnerId == nil { - continue - } - - for _, instance := range reservation.Instances { - if instance.InstanceId == nil || instance.State == nil { - continue - } - - instanceLogger := logger.WithField("instance", *instance.InstanceId) - if *instance.State.Name == "terminated" { - arn := fmt.Sprintf("arn:%s:ec2:%s:%s:instance/%s", partition.ID(), *ec2Client.Config.Region, *reservation.OwnerId, *instance.InstanceId) - if _, ok := terminated[arn]; !ok { - instanceLogger.Info("Terminated") - terminated[arn] = exists - } - continue - } - - matched = true - err := terminateEC2InstanceByInstance(ctx, ec2Client, iamClient, instance, instanceLogger) - if err != nil { - instanceLogger.Debug(err) - loopError = errors.Wrapf(err, "terminating %s", *instance.InstanceId) - continue - } - } - } - - return !lastPage - }, - ) - if err != nil { - err = errors.Wrap(err, "describe instances") - logger.Info(err) - matched = true - loopError = err - } - } - - return !matched && loopError == nil, nil - }, - ctx.Done(), - ) - return terminated, err -} - // This is a bit of hack. Some objects, like Instance Profiles, can not be tagged in AWS. // We "normally" find those objects by their relation to other objects. We have found, // however, that people regularly delete all of their instances and roles outside of @@ -1973,12 +2072,12 @@ func isBucketNotFound(err interface{}) bool { return false } -func removeSharedTags(ctx context.Context, tagClients []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI, tagClientNames map[*resourcegroupstaggingapi.ResourceGroupsTaggingAPI]string, filters []Filter, logger logrus.FieldLogger) error { +func removeSharedTags(ctx context.Context, tagClients []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI, filters []Filter, logger logrus.FieldLogger) error { for _, filter := range filters { for key, value := range filter { if strings.HasPrefix(key, "kubernetes.io/cluster/") { if value == "owned" { - if err := removeSharedTag(ctx, tagClients, tagClientNames, key, logger); err != nil { + if err := removeSharedTag(ctx, tagClients, key, logger); err != nil { return err } } else { @@ -1991,7 +2090,7 @@ func removeSharedTags(ctx context.Context, tagClients []*resourcegroupstaggingap return nil } -func removeSharedTag(ctx context.Context, tagClients []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI, tagClientNames map[*resourcegroupstaggingapi.ResourceGroupsTaggingAPI]string, key string, logger logrus.FieldLogger) error { +func removeSharedTag(ctx context.Context, tagClients []*resourcegroupstaggingapi.ResourceGroupsTaggingAPI, key string, logger logrus.FieldLogger) error { request := &resourcegroupstaggingapi.UntagResourcesInput{ TagKeys: []*string{aws.String(key)}, } @@ -2001,7 +2100,7 @@ func removeSharedTag(ctx context.Context, tagClients []*resourcegroupstaggingapi for len(tagClients) > 0 { nextTagClients := tagClients[:0] for _, tagClient := range tagClients { - logger.Debugf("Search for and remove tags in %s matching %s: shared", tagClientNames[tagClient], key) + logger.Debugf("Search for and remove tags in %s matching %s: shared", *tagClient.Config.Region, key) arns := []string{} err := tagClient.GetResourcesPagesWithContext( ctx, @@ -2027,7 +2126,7 @@ func removeSharedTag(ctx context.Context, tagClients []*resourcegroupstaggingapi continue } if len(arns) == 0 { - logger.Debugf("No matches in %s for %s: shared, removing client", tagClientNames[tagClient], key) + logger.Debugf("No matches in %s for %s: shared, removing client", *tagClient.Config.Region, key) continue } nextTagClients = append(nextTagClients, tagClient) diff --git a/vendor/github.com/openshift/installer/pkg/types/openstack/platform.go b/vendor/github.com/openshift/installer/pkg/types/openstack/platform.go index d523dfe31db..80094d17455 100644 --- a/vendor/github.com/openshift/installer/pkg/types/openstack/platform.go +++ b/vendor/github.com/openshift/installer/pkg/types/openstack/platform.go @@ -26,8 +26,14 @@ type Platform struct { // LbFloatingIP is the IP address of an available floating IP in your OpenStack cluster // to associate with the OpenShift load balancer. + // Deprecated: this value has been renamed to apiFloatingIP. // +optional - LbFloatingIP string `json:"lbFloatingIP,omitempty"` + DeprecatedLbFloatingIP string `json:"lbFloatingIP,omitempty"` + + // APIFloatingIP is the IP address of an available floating IP in your OpenStack cluster + // to associate with the OpenShift API load balancer. + // +optional + APIFloatingIP string `json:"apiFloatingIP,omitempty"` // IngressFloatingIP is the ID of an available floating IP in your OpenStack cluster // that will be associated with the OpenShift ingress port diff --git a/vendor/modules.txt b/vendor/modules.txt index c0c60798794..96a2f6b9b38 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -538,7 +538,7 @@ github.com/openshift/generic-admission-server/pkg/apiserver github.com/openshift/generic-admission-server/pkg/cmd github.com/openshift/generic-admission-server/pkg/cmd/server github.com/openshift/generic-admission-server/pkg/registry/admissionreview -# github.com/openshift/installer v0.9.0-master.0.20201006081509-887875ea9cf9 +# github.com/openshift/installer v0.9.0-master.0.20201006081509-887875ea9cf9 => github.com/staebler/installer v0.9.0-master.0.20201010200101-5a09a63bb753 ## explicit github.com/openshift/installer/data github.com/openshift/installer/pkg/asset/installconfig/aws @@ -1778,3 +1778,4 @@ sourcegraph.com/sqs/pbtypes # k8s.io/client-go => k8s.io/client-go v0.19.0 # github.com/openshift/library-go => github.com/openshift/library-go v0.0.0-20200918101923-1e4c94603efe # k8s.io/apiserver => github.com/staebler/apiserver v0.19.1-0.20201005174924-a3ef0d1e45df +# github.com/openshift/installer => github.com/staebler/installer v0.9.0-master.0.20201010200101-5a09a63bb753 From e3bef6c48ed8d1c47b22b9bd955b53ecf2c0a115 Mon Sep 17 00:00:00 2001 From: staebler Date: Fri, 5 Jun 2020 14:16:56 -0400 Subject: [PATCH 2/2] provide a means to adandon deprovisioning When there are AWS resources that cannot be destroyed, the list of blocked ARNs is added to the ClusterDeprovision status in the `blockedResources` field. The user can then take action on those blocked resources. The AWS destroyer will back off destroy attempts. Each attempt is limited to 5 minutes. The backoff starts at 5 minutes, doubles after each failed attempt, and caps at 24 hours. If the user wants to abandon a deprovision, the user can add the "hive.openshift.io/abandon-deprovision" annotation to the ClusterDeployment. When this annotation is present with a true value, the clusterdeployment controller will remove the deprovison finalizer from the ClusterDeployment without waiting for the ClusterDeprovision to complete. https://issues.redhat.com/browse/CO-943 --- ...hive.openshift.io_clusterdeprovisions.yaml | 6 ++ contrib/pkg/deprovision/awstagdeprovision.go | 95 ++++++++++++++++--- pkg/apis/hive/v1/clusterdeprovision_types.go | 3 + pkg/apis/hive/v1/zz_generated.deepcopy.go | 5 + pkg/clusterresource/openstack.go | 2 +- pkg/constants/constants.go | 6 ++ .../clusterdeployment_controller.go | 14 +++ .../clusterdeployment_controller_test.go | 46 +++++++++ .../clusterdeprovision_controller.go | 2 +- .../clusterdeprovision_controller_test.go | 2 +- pkg/controller/utils/sa.go | 5 + pkg/install/generate.go | 11 ++- pkg/install/generate_test.go | 2 +- 13 files changed, 181 insertions(+), 18 deletions(-) diff --git a/config/crds/hive.openshift.io_clusterdeprovisions.yaml b/config/crds/hive.openshift.io_clusterdeprovisions.yaml index 26f995d9355..43f9b67e2a6 100644 --- a/config/crds/hive.openshift.io_clusterdeprovisions.yaml +++ b/config/crds/hive.openshift.io_clusterdeprovisions.yaml @@ -203,6 +203,12 @@ spec: status: description: ClusterDeprovisionStatus defines the observed state of ClusterDeprovision properties: + blockedResources: + description: BlockedResources is a list of cloud resources that the + deprovision has not been able to delete + items: + type: string + type: array completed: description: Completed is true when the uninstall has completed successfully type: boolean diff --git a/contrib/pkg/deprovision/awstagdeprovision.go b/contrib/pkg/deprovision/awstagdeprovision.go index 88bd63e5c74..614440b532d 100644 --- a/contrib/pkg/deprovision/awstagdeprovision.go +++ b/contrib/pkg/deprovision/awstagdeprovision.go @@ -1,6 +1,7 @@ package deprovision import ( + "context" "fmt" "io/ioutil" "os" @@ -10,24 +11,36 @@ import ( log "github.com/sirupsen/logrus" "github.com/spf13/cobra" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/tools/clientcmd" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/openshift/installer/pkg/destroy/aws" "github.com/openshift/library-go/pkg/controller/fileobserver" + hivev1 "github.com/openshift/hive/pkg/apis/hive/v1" "github.com/openshift/hive/pkg/constants" ) +type awsTagDeprovisionOpts struct { + logLevel string + region string + filters []aws.Filter + clusterDeprovision string + logger log.FieldLogger +} + // NewDeprovisionAWSWithTagsCommand is the entrypoint to create the 'aws-tag-deprovision' subcommand // TODO: Port to a sub-command of deprovision. func NewDeprovisionAWSWithTagsCommand() *cobra.Command { - opt := &aws.ClusterUninstaller{} - var logLevel string + opt := &awsTagDeprovisionOpts{} cmd := &cobra.Command{ Use: "aws-tag-deprovision KEY=VALUE ...", Short: "Deprovision AWS assets (as created by openshift-installer) with the given tag(s)", Long: "Deprovision AWS assets (as created by openshift-installer) with the given tag(s). A resource matches the filter if any of the key/value pairs are in its tags.", Run: func(cmd *cobra.Command, args []string) { - if err := completeAWSUninstaller(opt, logLevel, args); err != nil { + if err := opt.complete(args); err != nil { log.WithError(err).Error("Cannot complete command") return } @@ -79,36 +92,36 @@ func NewDeprovisionAWSWithTagsCommand() *cobra.Command { }() } - if err := opt.Run(); err != nil { + if err := opt.run(); err != nil { log.WithError(err).Fatal("Runtime error") } }, } flags := cmd.Flags() - flags.StringVar(&logLevel, "loglevel", "info", "log level, one of: debug, info, warn, error, fatal, panic") - flags.StringVar(&opt.Region, "region", "us-east-1", "AWS region to use") + flags.StringVar(&opt.logLevel, "loglevel", "info", "log level, one of: debug, info, warn, error, fatal, panic") + flags.StringVar(&opt.region, "region", "us-east-1", "AWS region to use") + flags.StringVar(&opt.clusterDeprovision, "clusterdeprovision", "", "name of the ClusterDeprovision in which to stored blocked resources") return cmd } -func completeAWSUninstaller(o *aws.ClusterUninstaller, logLevel string, args []string) error { - +func (o *awsTagDeprovisionOpts) complete(args []string) error { for _, arg := range args { filter := aws.Filter{} err := parseFilter(filter, arg) if err != nil { return fmt.Errorf("cannot parse filter %s: %v", arg, err) } - o.Filters = append(o.Filters, filter) + o.filters = append(o.filters, filter) } // Set log level - level, err := log.ParseLevel(logLevel) + level, err := log.ParseLevel(o.logLevel) if err != nil { log.WithError(err).Error("cannot parse log level") return err } - o.Logger = log.NewEntry(&log.Logger{ + o.logger = log.NewEntry(&log.Logger{ Out: os.Stdout, Formatter: &log.TextFormatter{ FullTimestamp: true, @@ -120,6 +133,66 @@ func completeAWSUninstaller(o *aws.ClusterUninstaller, logLevel string, args []s return nil } +func (o *awsTagDeprovisionOpts) run() error { + return wait.ExponentialBackoff( + // Start the backoff at 5 minutes, double it each time, to a cap of 24 hours. + wait.Backoff{ + Duration: 5 * time.Minute, + Factor: 2, + Steps: 1 << 8, // large enough to make cap the effective bound + Cap: 24 * time.Hour, + }, + func() (done bool, returnErr error) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + uninstaller := &aws.ClusterUninstaller{ + Filters: o.filters, + Logger: o.logger, + Region: o.region, + } + blockedResources, err := uninstaller.RunWithContext(ctx) + if len(blockedResources) == 0 { + return true, err + } + if o.clusterDeprovision == "" { + return + } + kubeconfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( + clientcmd.NewDefaultClientConfigLoadingRules(), + &clientcmd.ConfigOverrides{}, + ) + namespace, _, err := kubeconfig.Namespace() + if err != nil { + o.logger.WithError(err).Error("could not get the namespace") + return + } + clientConfig, err := kubeconfig.ClientConfig() + if err != nil { + o.logger.WithError(err).Error("could not get the kube client config") + return + } + scheme := runtime.NewScheme() + hivev1.AddToScheme(scheme) + c, err := client.New(clientConfig, client.Options{Scheme: scheme}) + if err != nil { + o.logger.WithError(err).Error("could not get kube client") + return + } + clusterDeprovision := &hivev1.ClusterDeprovision{} + if err := c.Get(context.Background(), client.ObjectKey{Namespace: namespace, Name: o.clusterDeprovision}, clusterDeprovision); err != nil { + o.logger.WithError(err).Error("could not get ClusterDeprovision") + return + } + clusterDeprovision.Status.BlockedResources = blockedResources + if err := c.Status().Update(context.Background(), clusterDeprovision); err != nil { + o.logger.WithError(err).Error("could not update ClusterDeprovision") + return + } + return + }, + ) +} + func parseFilter(filterMap aws.Filter, str string) error { parts := strings.SplitN(str, "=", 2) if len(parts) != 2 { diff --git a/pkg/apis/hive/v1/clusterdeprovision_types.go b/pkg/apis/hive/v1/clusterdeprovision_types.go index b9f4b4ac6d1..c2cbdeb2540 100644 --- a/pkg/apis/hive/v1/clusterdeprovision_types.go +++ b/pkg/apis/hive/v1/clusterdeprovision_types.go @@ -25,6 +25,9 @@ type ClusterDeprovisionStatus struct { // Conditions includes more detailed status for the cluster deprovision // +optional Conditions []ClusterDeprovisionCondition `json:"conditions,omitempty"` + + // BlockedResources is a list of cloud resources that the deprovision has not been able to delete + BlockedResources []string `json:"blockedResources,omitempty"` } // ClusterDeprovisionPlatform contains platform-specific configuration for the diff --git a/pkg/apis/hive/v1/zz_generated.deepcopy.go b/pkg/apis/hive/v1/zz_generated.deepcopy.go index 642e78272d8..fe9bfb2fe45 100644 --- a/pkg/apis/hive/v1/zz_generated.deepcopy.go +++ b/pkg/apis/hive/v1/zz_generated.deepcopy.go @@ -780,6 +780,11 @@ func (in *ClusterDeprovisionStatus) DeepCopyInto(out *ClusterDeprovisionStatus) (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.BlockedResources != nil { + in, out := &in.BlockedResources, &out.BlockedResources + *out = make([]string, len(*in)) + copy(*out, *in) + } return } diff --git a/pkg/clusterresource/openstack.go b/pkg/clusterresource/openstack.go index 8af65e8a3dc..31588db0cf0 100644 --- a/pkg/clusterresource/openstack.go +++ b/pkg/clusterresource/openstack.go @@ -83,7 +83,7 @@ func (p *OpenStackCloudBuilder) addInstallConfigPlatform(o *Builder, ic *install Cloud: p.Cloud, ExternalNetwork: p.ExternalNetwork, FlavorName: p.ComputeFlavor, - LbFloatingIP: p.APIFloatingIP, + APIFloatingIP: p.APIFloatingIP, }, } diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index d5d3828f881..7ef277a3ab3 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -158,6 +158,12 @@ const ( // An incoming status indicates that the resource is on the destination side of an in-progress relocate. RelocateAnnotation = "hive.openshift.io/relocate" + // AbandonDeprovisionAnnotation is an annotation used on ClusterDeployments to indicate that attempts to deprovision + // the cluster should be abandoned. This is used when there are cloud resources that cannot be deleted without user + // intervention. Prior to abandoning, the user should collect the list of blocked resources from the + // ClusterDeprovision. + AbandonDeprovisionAnnotation = "hive.openshift.io/abandon-deprovision" + // ManagedDomainsFileEnvVar if present, points to a simple text // file that includes a valid managed domain per line. Cluster deployments // requesting that their domains be managed must have a base domain diff --git a/pkg/controller/clusterdeployment/clusterdeployment_controller.go b/pkg/controller/clusterdeployment/clusterdeployment_controller.go index e7887ac5748..7192d789e97 100644 --- a/pkg/controller/clusterdeployment/clusterdeployment_controller.go +++ b/pkg/controller/clusterdeployment/clusterdeployment_controller.go @@ -1296,6 +1296,20 @@ func (r *ReconcileClusterDeployment) ensureClusterDeprovisioned(cd *hivev1.Clust cdLog.Info("PreserveOnDelete=true but creating deprovisioning request as cluster was never successfully provisioned") } + // Stop waiting for deprovision if the abandon-deprovision annotation is true + if value, ok := cd.Annotations[constants.AbandonDeprovisionAnnotation]; ok { + logger := cdLog.WithField(constants.AbandonDeprovisionAnnotation, value) + if abandon, err := strconv.ParseBool(value); abandon && err == nil { + logger.Warn("adandoning deprovision") + err = r.removeClusterDeploymentFinalizer(cd, cdLog) + if err != nil { + cdLog.WithError(err).Log(controllerutils.LogLevel(err), "error removing finalizer") + } + return true, err + } + logger.Debug("ignoring abandon-deprovision annotation") + } + if cd.Spec.ClusterMetadata == nil { cdLog.Warn("skipping uninstall for cluster that never had clusterID set") return true, nil diff --git a/pkg/controller/clusterdeployment/clusterdeployment_controller_test.go b/pkg/controller/clusterdeployment/clusterdeployment_controller_test.go index 02acf34b626..4fc61aee2d0 100644 --- a/pkg/controller/clusterdeployment/clusterdeployment_controller_test.go +++ b/pkg/controller/clusterdeployment/clusterdeployment_controller_test.go @@ -494,6 +494,52 @@ func TestClusterDeploymentReconcile(t *testing.T) { assert.Nil(t, deprovision, "expected no deprovision request") }, }, + { + name: "Test abandon deprovison annotation", + existing: []runtime.Object{ + func() *hivev1.ClusterDeployment { + cd := testDeletedClusterDeployment() + if cd.Annotations == nil { + cd.Annotations = make(map[string]string, 1) + } + cd.Annotations[constants.AbandonDeprovisionAnnotation] = "true" + return cd + }(), + testSecret(corev1.SecretTypeDockerConfigJson, pullSecretSecret, corev1.DockerConfigJsonKey, "{}"), + testSecret(corev1.SecretTypeDockerConfigJson, constants.GetMergedPullSecretName(testClusterDeployment()), corev1.DockerConfigJsonKey, "{}"), + }, + validate: func(c client.Client, t *testing.T) { + cd := getCD(c) + if assert.NotNil(t, cd, "missing clusterdeployment") { + assert.Empty(t, cd.Finalizers, "expected empty finalizers") + } + deprovision := getDeprovision(c) + assert.Nil(t, deprovision, "expected no deprovision request") + }, + }, + { + name: "Test ignored abandon deprovison annotation", + existing: []runtime.Object{ + func() *hivev1.ClusterDeployment { + cd := testDeletedClusterDeployment() + if cd.Annotations == nil { + cd.Annotations = make(map[string]string, 1) + } + cd.Annotations[constants.AbandonDeprovisionAnnotation] = "false" + return cd + }(), + testSecret(corev1.SecretTypeDockerConfigJson, pullSecretSecret, corev1.DockerConfigJsonKey, "{}"), + testSecret(corev1.SecretTypeDockerConfigJson, constants.GetMergedPullSecretName(testClusterDeployment()), corev1.DockerConfigJsonKey, "{}"), + }, + validate: func(c client.Client, t *testing.T) { + cd := getCD(c) + if assert.NotNil(t, cd, "missing clusterdeployment") { + assert.Contains(t, cd.Finalizers, hivev1.FinalizerDeprovision, "expected deprovision finalizer") + } + deprovision := getDeprovision(c) + assert.NotNil(t, deprovision, "expected deprovision request") + }, + }, { name: "Test creation of uninstall job when PreserveOnDelete is true but cluster deployment is not installed", existing: []runtime.Object{ diff --git a/pkg/controller/clusterdeprovision/clusterdeprovision_controller.go b/pkg/controller/clusterdeprovision/clusterdeprovision_controller.go index e0631251d7f..308b66c8b89 100644 --- a/pkg/controller/clusterdeprovision/clusterdeprovision_controller.go +++ b/pkg/controller/clusterdeprovision/clusterdeprovision_controller.go @@ -276,7 +276,7 @@ func (r *ReconcileClusterDeprovision) Reconcile(request reconcile.Request) (reco // Generate an uninstall job rLog.Debug("generating uninstall job") - uninstallJob, err := install.GenerateUninstallerJobForDeprovision(instance) + uninstallJob, err := install.GenerateUninstallerJobForDeprovision(instance, controllerutils.ServiceAccountName) if err != nil { rLog.Errorf("error generating uninstaller job: %v", err) return reconcile.Result{}, err diff --git a/pkg/controller/clusterdeprovision/clusterdeprovision_controller_test.go b/pkg/controller/clusterdeprovision/clusterdeprovision_controller_test.go index 3cc5e42e1ba..26b2b75714b 100644 --- a/pkg/controller/clusterdeprovision/clusterdeprovision_controller_test.go +++ b/pkg/controller/clusterdeprovision/clusterdeprovision_controller_test.go @@ -344,7 +344,7 @@ func testClusterDeployment() *hivev1.ClusterDeployment { } func testUninstallJob() *batchv1.Job { - uninstallJob, _ := install.GenerateUninstallerJobForDeprovision(testClusterDeprovision()) + uninstallJob, _ := install.GenerateUninstallerJobForDeprovision(testClusterDeprovision(), "test-service-account") hash, err := controllerutils.CalculateJobSpecHash(uninstallJob) if err != nil { panic("should never get error calculating job spec hash") diff --git a/pkg/controller/utils/sa.go b/pkg/controller/utils/sa.go index 0981ee2edf5..6535deb466a 100644 --- a/pkg/controller/utils/sa.go +++ b/pkg/controller/utils/sa.go @@ -45,6 +45,11 @@ var ( Resources: []string{"clusterprovisions", "clusterprovisions/finalizers", "clusterprovisions/status"}, Verbs: []string{"get", "list", "update", "watch"}, }, + { + APIGroups: []string{"hive.openshift.io"}, + Resources: []string{"clusterdeprovisions", "clusterdeprovisions/status"}, + Verbs: []string{"get", "update"}, + }, } ) diff --git a/pkg/install/generate.go b/pkg/install/generate.go index 0ab5ad97c05..0ea5350c71a 100644 --- a/pkg/install/generate.go +++ b/pkg/install/generate.go @@ -445,13 +445,16 @@ func GetUninstallJobName(name string) string { // GenerateUninstallerJobForDeprovision generates an uninstaller job for a given deprovision request func GenerateUninstallerJobForDeprovision( - req *hivev1.ClusterDeprovision) (*batchv1.Job, error) { + req *hivev1.ClusterDeprovision, + serviceAccountName string, +) (*batchv1.Job, error) { restartPolicy := corev1.RestartPolicyOnFailure podSpec := corev1.PodSpec{ - DNSPolicy: corev1.DNSClusterFirst, - RestartPolicy: restartPolicy, + DNSPolicy: corev1.DNSClusterFirst, + RestartPolicy: restartPolicy, + ServiceAccountName: serviceAccountName, } completions := int32(1) @@ -514,6 +517,8 @@ func completeAWSDeprovisionJob(req *hivev1.ClusterDeprovision, job *batchv1.Job) "debug", "--region", req.Spec.Platform.AWS.Region, + "--clusterdeprovision", + req.Name, fmt.Sprintf("kubernetes.io/cluster/%s=owned", req.Spec.InfraID), }, }, diff --git a/pkg/install/generate_test.go b/pkg/install/generate_test.go index ba00782fdff..e0d78d7d31e 100644 --- a/pkg/install/generate_test.go +++ b/pkg/install/generate_test.go @@ -22,7 +22,7 @@ func init() { func TestGenerateDeprovision(t *testing.T) { dr := testClusterDeprovision() - job, err := GenerateUninstallerJobForDeprovision(dr) + job, err := GenerateUninstallerJobForDeprovision(dr, "test-service-account") assert.Nil(t, err) assert.NotNil(t, job) }