Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions config/crds/hive.openshift.io_clusterdeprovisions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ spec:
status:
description: ClusterDeprovisionStatus defines the observed state of ClusterDeprovision
properties:
blockedResources:
description: BlockedResources is a list of cloud resources that the
deprovision has not been able to delete
items:
type: string
type: array
completed:
description: Completed is true when the uninstall has completed successfully
type: boolean
Expand Down
95 changes: 84 additions & 11 deletions contrib/pkg/deprovision/awstagdeprovision.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package deprovision

import (
"context"
"fmt"
"io/ioutil"
"os"
Expand All @@ -10,24 +11,36 @@ import (

log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/clientcmd"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/openshift/installer/pkg/destroy/aws"
"github.com/openshift/library-go/pkg/controller/fileobserver"

hivev1 "github.com/openshift/hive/pkg/apis/hive/v1"
"github.com/openshift/hive/pkg/constants"
)

type awsTagDeprovisionOpts struct {
logLevel string
region string
filters []aws.Filter
clusterDeprovision string
logger log.FieldLogger
}

// NewDeprovisionAWSWithTagsCommand is the entrypoint to create the 'aws-tag-deprovision' subcommand
// TODO: Port to a sub-command of deprovision.
func NewDeprovisionAWSWithTagsCommand() *cobra.Command {
opt := &aws.ClusterUninstaller{}
var logLevel string
opt := &awsTagDeprovisionOpts{}
cmd := &cobra.Command{
Use: "aws-tag-deprovision KEY=VALUE ...",
Short: "Deprovision AWS assets (as created by openshift-installer) with the given tag(s)",
Long: "Deprovision AWS assets (as created by openshift-installer) with the given tag(s). A resource matches the filter if any of the key/value pairs are in its tags.",
Run: func(cmd *cobra.Command, args []string) {
if err := completeAWSUninstaller(opt, logLevel, args); err != nil {
if err := opt.complete(args); err != nil {
log.WithError(err).Error("Cannot complete command")
return
}
Expand Down Expand Up @@ -79,36 +92,36 @@ func NewDeprovisionAWSWithTagsCommand() *cobra.Command {
}()
}

if err := opt.Run(); err != nil {
if err := opt.run(); err != nil {
log.WithError(err).Fatal("Runtime error")
}
},
}
flags := cmd.Flags()
flags.StringVar(&logLevel, "loglevel", "info", "log level, one of: debug, info, warn, error, fatal, panic")
flags.StringVar(&opt.Region, "region", "us-east-1", "AWS region to use")
flags.StringVar(&opt.logLevel, "loglevel", "info", "log level, one of: debug, info, warn, error, fatal, panic")
flags.StringVar(&opt.region, "region", "us-east-1", "AWS region to use")
flags.StringVar(&opt.clusterDeprovision, "clusterdeprovision", "", "name of the ClusterDeprovision in which to stored blocked resources")
return cmd
}

func completeAWSUninstaller(o *aws.ClusterUninstaller, logLevel string, args []string) error {

func (o *awsTagDeprovisionOpts) complete(args []string) error {
for _, arg := range args {
filter := aws.Filter{}
err := parseFilter(filter, arg)
if err != nil {
return fmt.Errorf("cannot parse filter %s: %v", arg, err)
}
o.Filters = append(o.Filters, filter)
o.filters = append(o.filters, filter)
}

// Set log level
level, err := log.ParseLevel(logLevel)
level, err := log.ParseLevel(o.logLevel)
if err != nil {
log.WithError(err).Error("cannot parse log level")
return err
}

o.Logger = log.NewEntry(&log.Logger{
o.logger = log.NewEntry(&log.Logger{
Out: os.Stdout,
Formatter: &log.TextFormatter{
FullTimestamp: true,
Expand All @@ -120,6 +133,66 @@ func completeAWSUninstaller(o *aws.ClusterUninstaller, logLevel string, args []s
return nil
}

func (o *awsTagDeprovisionOpts) run() error {
return wait.ExponentialBackoff(
// Start the backoff at 5 minutes, double it each time, to a cap of 24 hours.
wait.Backoff{
Duration: 5 * time.Minute,
Factor: 2,
Steps: 1 << 8, // large enough to make cap the effective bound
Cap: 24 * time.Hour,
},
func() (done bool, returnErr error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: named returns are not great as they allow empty returrn in the function definition which makes it difficult to read what is being returned and when was it set..

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
uninstaller := &aws.ClusterUninstaller{
Filters: o.filters,
Logger: o.logger,
Region: o.region,
}
blockedResources, err := uninstaller.RunWithContext(ctx)
if len(blockedResources) == 0 {
return true, err
}
if o.clusterDeprovision == "" {
return
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we not also return err here even if we will not be posting status to a ClusterDeprovision object?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are any blocked resources, then we want to keep trying to uninstall in the backoff loop. If we return err here, then the uninstall will stop and the pod will fail.

However, maybe it would be good to distinguish between an error because the context expired and an error for other reasons. In the former case, we want to keep trying. In the latter case, we want to abort. I'll look into that.

}
kubeconfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
clientcmd.NewDefaultClientConfigLoadingRules(),
&clientcmd.ConfigOverrides{},
)
namespace, _, err := kubeconfig.Namespace()
if err != nil {
o.logger.WithError(err).Error("could not get the namespace")
return
}
Comment on lines +164 to +168
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this use the context setup using the kubeconfig to get the namespace? How many users would use something like that? I personally haven't So should we also allow for setting namespace directly?

clientConfig, err := kubeconfig.ClientConfig()
if err != nil {
o.logger.WithError(err).Error("could not get the kube client config")
return
}
scheme := runtime.NewScheme()
hivev1.AddToScheme(scheme)
c, err := client.New(clientConfig, client.Options{Scheme: scheme})
if err != nil {
o.logger.WithError(err).Error("could not get kube client")
return
}
clusterDeprovision := &hivev1.ClusterDeprovision{}
if err := c.Get(context.Background(), client.ObjectKey{Namespace: namespace, Name: o.clusterDeprovision}, clusterDeprovision); err != nil {
o.logger.WithError(err).Error("could not get ClusterDeprovision")
return
}
clusterDeprovision.Status.BlockedResources = blockedResources
if err := c.Status().Update(context.Background(), clusterDeprovision); err != nil {
o.logger.WithError(err).Error("could not update ClusterDeprovision")
return
}
return
},
)
}

func parseFilter(filterMap aws.Filter, str string) error {
parts := strings.SplitN(str, "=", 2)
if len(parts) != 2 {
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,5 @@ replace github.com/openshift/library-go => github.com/openshift/library-go v0.0.

// temporary hack fix for https://github.com/kubernetes/kubernetes/issues/95300
replace k8s.io/apiserver => github.com/staebler/apiserver v0.19.1-0.20201005174924-a3ef0d1e45df

replace github.com/openshift/installer => github.com/staebler/installer v0.9.0-master.0.20201010200101-5a09a63bb753
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1317,8 +1317,6 @@ github.com/openshift/cluster-autoscaler-operator v0.0.0-20190521201101-62768a6ba
github.com/openshift/cluster-version-operator v3.11.1-0.20190629164025-08cac1c02538+incompatible/go.mod h1:0BbpR1mrN0F2ZRae5N1XHcytmkvVPaeKgSQwRRBWugc=
github.com/openshift/generic-admission-server v1.14.1-0.20200903115324-4ddcdd976480 h1:y47BAJFepK8Xls1c+quIOyc46OXiT9LRiqGVjIaMlSA=
github.com/openshift/generic-admission-server v1.14.1-0.20200903115324-4ddcdd976480/go.mod h1:OAHL5WnZphlhVEf5fTdeGLvNwMu1B2zCWpmxJpCA35o=
github.com/openshift/installer v0.9.0-master.0.20201006081509-887875ea9cf9 h1:sW+qISOi7iHyXrAdl/lUANiCc4CNrYqjTmyshUyZKMs=
github.com/openshift/installer v0.9.0-master.0.20201006081509-887875ea9cf9/go.mod h1:gj5hubG057ciWCCBUq2+BK29ELoySn5H1vSP842n/uU=
github.com/openshift/library-go v0.0.0-20200918101923-1e4c94603efe h1:MJqGN0NVONnTLDYPVIEH4uo6i3gAK7LAkhLnyFO8Je0=
github.com/openshift/library-go v0.0.0-20200918101923-1e4c94603efe/go.mod h1:NI6xOQGuTnLXeHW8Z2glKSFhF7X+YxlAlqlBMaK0zEM=
github.com/openshift/machine-api-operator v0.0.0-20190312153711-9650e16c9880/go.mod h1:7HeAh0v04zQn1L+4ItUjvpBQYsm2Nf81WaZLiXTcnkc=
Expand Down Expand Up @@ -1589,6 +1587,8 @@ github.com/spf13/viper v1.6.1 h1:VPZzIkznI1YhVMRi6vNFLHSwhnhReBfgTxIPccpfdZk=
github.com/spf13/viper v1.6.1/go.mod h1:t3iDnF5Jlj76alVNuyFBk5oUMCvsrkbvZK0WQdfDi5k=
github.com/staebler/apiserver v0.19.1-0.20201005174924-a3ef0d1e45df h1:tOLmJyPkBiNtX5vm5bMRIsCwjEljkNeeoB0IBi5V6VU=
github.com/staebler/apiserver v0.19.1-0.20201005174924-a3ef0d1e45df/go.mod h1:XvzqavYj73931x7FLtyagh8WibHpePJ1QwWrSJs2CLk=
github.com/staebler/installer v0.9.0-master.0.20201010200101-5a09a63bb753 h1:qG3ZoJqMVTCf22CCvxtEGrQ/SrwxuIzzMrrcEQBCFZE=
github.com/staebler/installer v0.9.0-master.0.20201010200101-5a09a63bb753/go.mod h1:gj5hubG057ciWCCBUq2+BK29ELoySn5H1vSP842n/uU=
github.com/stoewer/go-strcase v1.0.2/go.mod h1:eLfe5bL3qbL7ep/KafHzthxejrOF5J3xmt03uL5tzek=
github.com/stoewer/go-strcase v1.1.0/go.mod h1:G7YglbHPK5jX3JcWljxVXRXPh90/dtxfy6xWqxu5b90=
github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw=
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/hive/v1/clusterdeprovision_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ type ClusterDeprovisionStatus struct {
// Conditions includes more detailed status for the cluster deprovision
// +optional
Conditions []ClusterDeprovisionCondition `json:"conditions,omitempty"`

// BlockedResources is a list of cloud resources that the deprovision has not been able to delete
BlockedResources []string `json:"blockedResources,omitempty"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How are things looking for the other cloud providers, will a flat list of strings be sufficient as far as we can see? Wondering if we should go with something that lets us store a little more data or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good idea. I can change BlockedResources to a struct that for now has nothing but a name field.

}

// ClusterDeprovisionPlatform contains platform-specific configuration for the
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/hive/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pkg/clusterresource/openstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func (p *OpenStackCloudBuilder) addInstallConfigPlatform(o *Builder, ic *install
Cloud: p.Cloud,
ExternalNetwork: p.ExternalNetwork,
FlavorName: p.ComputeFlavor,
LbFloatingIP: p.APIFloatingIP,
APIFloatingIP: p.APIFloatingIP,
},
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ const (
// An incoming status indicates that the resource is on the destination side of an in-progress relocate.
RelocateAnnotation = "hive.openshift.io/relocate"

// AbandonDeprovisionAnnotation is an annotation used on ClusterDeployments to indicate that attempts to deprovision
// the cluster should be abandoned. This is used when there are cloud resources that cannot be deleted without user
// intervention. Prior to abandoning, the user should collect the list of blocked resources from the
// ClusterDeprovision.
AbandonDeprovisionAnnotation = "hive.openshift.io/abandon-deprovision"

// ManagedDomainsFileEnvVar if present, points to a simple text
// file that includes a valid managed domain per line. Cluster deployments
// requesting that their domains be managed must have a base domain
Expand Down
14 changes: 14 additions & 0 deletions pkg/controller/clusterdeployment/clusterdeployment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1296,6 +1296,20 @@ func (r *ReconcileClusterDeployment) ensureClusterDeprovisioned(cd *hivev1.Clust
cdLog.Info("PreserveOnDelete=true but creating deprovisioning request as cluster was never successfully provisioned")
}

// Stop waiting for deprovision if the abandon-deprovision annotation is true
if value, ok := cd.Annotations[constants.AbandonDeprovisionAnnotation]; ok {
logger := cdLog.WithField(constants.AbandonDeprovisionAnnotation, value)
if abandon, err := strconv.ParseBool(value); abandon && err == nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if abandon, err := strconv.ParseBool(value); abandon && err == nil {
if abandon, err := strconv.ParseBool(value); err == nil && abandon {

wouldn't that be more appropriate?

logger.Warn("adandoning deprovision")
err = r.removeClusterDeploymentFinalizer(cd, cdLog)
if err != nil {
cdLog.WithError(err).Log(controllerutils.LogLevel(err), "error removing finalizer")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we will return deprovisioned = true even when there was an error to remove the finalizer.. shouldn't we return false here?

}
return true, err
}
logger.Debug("ignoring abandon-deprovision annotation")
}

if cd.Spec.ClusterMetadata == nil {
cdLog.Warn("skipping uninstall for cluster that never had clusterID set")
return true, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,52 @@ func TestClusterDeploymentReconcile(t *testing.T) {
assert.Nil(t, deprovision, "expected no deprovision request")
},
},
{
name: "Test abandon deprovison annotation",
existing: []runtime.Object{
func() *hivev1.ClusterDeployment {
cd := testDeletedClusterDeployment()
if cd.Annotations == nil {
cd.Annotations = make(map[string]string, 1)
}
cd.Annotations[constants.AbandonDeprovisionAnnotation] = "true"
return cd
}(),
testSecret(corev1.SecretTypeDockerConfigJson, pullSecretSecret, corev1.DockerConfigJsonKey, "{}"),
testSecret(corev1.SecretTypeDockerConfigJson, constants.GetMergedPullSecretName(testClusterDeployment()), corev1.DockerConfigJsonKey, "{}"),
},
validate: func(c client.Client, t *testing.T) {
cd := getCD(c)
if assert.NotNil(t, cd, "missing clusterdeployment") {
assert.Empty(t, cd.Finalizers, "expected empty finalizers")
}
deprovision := getDeprovision(c)
assert.Nil(t, deprovision, "expected no deprovision request")
},
},
{
name: "Test ignored abandon deprovison annotation",
existing: []runtime.Object{
func() *hivev1.ClusterDeployment {
cd := testDeletedClusterDeployment()
if cd.Annotations == nil {
cd.Annotations = make(map[string]string, 1)
}
cd.Annotations[constants.AbandonDeprovisionAnnotation] = "false"
return cd
}(),
testSecret(corev1.SecretTypeDockerConfigJson, pullSecretSecret, corev1.DockerConfigJsonKey, "{}"),
testSecret(corev1.SecretTypeDockerConfigJson, constants.GetMergedPullSecretName(testClusterDeployment()), corev1.DockerConfigJsonKey, "{}"),
},
validate: func(c client.Client, t *testing.T) {
cd := getCD(c)
if assert.NotNil(t, cd, "missing clusterdeployment") {
assert.Contains(t, cd.Finalizers, hivev1.FinalizerDeprovision, "expected deprovision finalizer")
}
deprovision := getDeprovision(c)
assert.NotNil(t, deprovision, "expected deprovision request")
},
},
{
name: "Test creation of uninstall job when PreserveOnDelete is true but cluster deployment is not installed",
existing: []runtime.Object{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ func (r *ReconcileClusterDeprovision) Reconcile(request reconcile.Request) (reco

// Generate an uninstall job
rLog.Debug("generating uninstall job")
uninstallJob, err := install.GenerateUninstallerJobForDeprovision(instance)
uninstallJob, err := install.GenerateUninstallerJobForDeprovision(instance, controllerutils.ServiceAccountName)
if err != nil {
rLog.Errorf("error generating uninstaller job: %v", err)
return reconcile.Result{}, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ func testClusterDeployment() *hivev1.ClusterDeployment {
}

func testUninstallJob() *batchv1.Job {
uninstallJob, _ := install.GenerateUninstallerJobForDeprovision(testClusterDeprovision())
uninstallJob, _ := install.GenerateUninstallerJobForDeprovision(testClusterDeprovision(), "test-service-account")
hash, err := controllerutils.CalculateJobSpecHash(uninstallJob)
if err != nil {
panic("should never get error calculating job spec hash")
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/utils/sa.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ var (
Resources: []string{"clusterprovisions", "clusterprovisions/finalizers", "clusterprovisions/status"},
Verbs: []string{"get", "list", "update", "watch"},
},
{
APIGroups: []string{"hive.openshift.io"},
Resources: []string{"clusterdeprovisions", "clusterdeprovisions/status"},
Verbs: []string{"get", "update"},
},
}
)

Expand Down
11 changes: 8 additions & 3 deletions pkg/install/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,13 +445,16 @@ func GetUninstallJobName(name string) string {

// GenerateUninstallerJobForDeprovision generates an uninstaller job for a given deprovision request
func GenerateUninstallerJobForDeprovision(
req *hivev1.ClusterDeprovision) (*batchv1.Job, error) {
req *hivev1.ClusterDeprovision,
serviceAccountName string,
) (*batchv1.Job, error) {

restartPolicy := corev1.RestartPolicyOnFailure

podSpec := corev1.PodSpec{
DNSPolicy: corev1.DNSClusterFirst,
RestartPolicy: restartPolicy,
DNSPolicy: corev1.DNSClusterFirst,
RestartPolicy: restartPolicy,
ServiceAccountName: serviceAccountName,
}

completions := int32(1)
Expand Down Expand Up @@ -514,6 +517,8 @@ func completeAWSDeprovisionJob(req *hivev1.ClusterDeprovision, job *batchv1.Job)
"debug",
"--region",
req.Spec.Platform.AWS.Region,
"--clusterdeprovision",
req.Name,
fmt.Sprintf("kubernetes.io/cluster/%s=owned", req.Spec.InfraID),
},
},
Expand Down
2 changes: 1 addition & 1 deletion pkg/install/generate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func init() {

func TestGenerateDeprovision(t *testing.T) {
dr := testClusterDeprovision()
job, err := GenerateUninstallerJobForDeprovision(dr)
job, err := GenerateUninstallerJobForDeprovision(dr, "test-service-account")
assert.Nil(t, err)
assert.NotNil(t, job)
}
Expand Down
Loading