diff --git a/pkg/controller/clusterclaim/clusterclaim_controller.go b/pkg/controller/clusterclaim/clusterclaim_controller.go index 51e90f5f105..0c87460dea7 100644 --- a/pkg/controller/clusterclaim/clusterclaim_controller.go +++ b/pkg/controller/clusterclaim/clusterclaim_controller.go @@ -263,7 +263,8 @@ func (r *ReconcileClusterClaim) Reconcile(ctx context.Context, request reconcile switch cd.Spec.ClusterPoolRef.ClaimName { case "": - return r.reconcileForNewAssignment(claim, cd, logger) + logger.Debugf("clusterdeployment %s has not yet been assigned to claim", cd.Name) + return reconcile.Result{}, nil case claim.Name: return r.reconcileForExistingAssignment(claim, cd, logger) default: @@ -428,17 +429,6 @@ func (r *ReconcileClusterClaim) reconcileForDeletedCluster(claim *hivev1.Cluster return reconcile.Result{}, nil } -func (r *ReconcileClusterClaim) reconcileForNewAssignment(claim *hivev1.ClusterClaim, cd *hivev1.ClusterDeployment, logger log.FieldLogger) (reconcile.Result, error) { - logger.Info("cluster assigned to claim") - cd.Spec.ClusterPoolRef.ClaimName = claim.Name - cd.Spec.PowerState = hivev1.RunningClusterPowerState - if err := r.Update(context.Background(), cd); err != nil { - logger.WithError(err).Log(controllerutils.LogLevel(err), "could not set claim for ClusterDeployment") - return reconcile.Result{}, err - } - return r.reconcileForExistingAssignment(claim, cd, logger) -} - func (r *ReconcileClusterClaim) reconcileForExistingAssignment(claim *hivev1.ClusterClaim, cd *hivev1.ClusterDeployment, logger log.FieldLogger) (reconcile.Result, error) { logger.Debug("claim has existing cluster assignment") if err := r.createRBAC(claim, cd, logger); err != nil { diff --git a/pkg/controller/clusterclaim/clusterclaim_controller_test.go b/pkg/controller/clusterclaim/clusterclaim_controller_test.go index dc3ad6a4fa6..23545bf65f0 100644 --- a/pkg/controller/clusterclaim/clusterclaim_controller_test.go +++ b/pkg/controller/clusterclaim/clusterclaim_controller_test.go @@ -124,7 +124,7 @@ func TestReconcileClusterClaim(t *testing.T) { }, }, { - name: "new assignment", + name: "unassigned CD is a no-op", claim: initializedClaimBuilder.Build(testclaim.WithCluster(clusterName)), cd: cdBuilder.Build( testcd.WithUnclaimedClusterPoolReference(claimNamespace, "test-pool"), @@ -133,20 +133,14 @@ func TestReconcileClusterClaim(t *testing.T) { Status: corev1.ConditionTrue, }, )), - expectCompletedClaim: true, - expectRBAC: true, expectedConditions: []hivev1.ClusterClaimCondition{ { - Type: hivev1.ClusterClaimPendingCondition, - Status: corev1.ConditionFalse, - Reason: "ClusterClaimed", - Message: "Cluster claimed", + Type: hivev1.ClusterClaimPendingCondition, + Status: corev1.ConditionUnknown, }, { - Type: hivev1.ClusterRunningCondition, - Status: corev1.ConditionFalse, - Reason: "Resuming", - Message: "Waiting for cluster to be running", + Type: hivev1.ClusterRunningCondition, + Status: corev1.ConditionUnknown, }, }, }, @@ -215,7 +209,7 @@ func TestReconcileClusterClaim(t *testing.T) { expectAssignedClusterDeploymentDeleted: true, }, { - name: "new assignment clears pending condition", + name: "existing assignment clears pending condition", claim: initializedClaimBuilder.Build( testclaim.WithCluster(clusterName), testclaim.WithCondition( @@ -226,7 +220,7 @@ func TestReconcileClusterClaim(t *testing.T) { ), ), cd: cdBuilder.Build( - testcd.WithUnclaimedClusterPoolReference(claimNamespace, "test-pool"), + testcd.WithClusterPoolReference(claimNamespace, "test-pool", claimName), testcd.WithCondition(hivev1.ClusterDeploymentCondition{ Type: hivev1.ClusterHibernatingCondition, Status: corev1.ConditionTrue, @@ -324,7 +318,7 @@ func TestReconcileClusterClaim(t *testing.T) { { name: "no RBAC when no subjects", claim: initializedClaimBuilder.Build(testclaim.WithCluster(clusterName), testclaim.WithSubjects(nil)), - cd: cdBuilder.Build(testcd.WithUnclaimedClusterPoolReference(claimNamespace, "test-pool"), + cd: cdBuilder.Build(testcd.WithClusterPoolReference(claimNamespace, "test-pool", claimName), testcd.WithCondition(hivev1.ClusterDeploymentCondition{ Type: hivev1.ClusterHibernatingCondition, Status: corev1.ConditionTrue, @@ -350,7 +344,7 @@ func TestReconcileClusterClaim(t *testing.T) { name: "update existing role", claim: initializedClaimBuilder.Build(testclaim.WithCluster(clusterName)), cd: cdBuilder.Build( - testcd.WithUnclaimedClusterPoolReference(claimNamespace, "test-pool"), + testcd.WithClusterPoolReference(claimNamespace, "test-pool", claimName), testcd.WithCondition(hivev1.ClusterDeploymentCondition{ Type: hivev1.ClusterHibernatingCondition, Status: corev1.ConditionTrue, @@ -384,7 +378,7 @@ func TestReconcileClusterClaim(t *testing.T) { name: "update existing rolebinding", claim: initializedClaimBuilder.Build(testclaim.WithCluster(clusterName)), cd: cdBuilder.Build( - testcd.WithUnclaimedClusterPoolReference(claimNamespace, "test-pool"), + testcd.WithClusterPoolReference(claimNamespace, "test-pool", claimName), testcd.WithCondition(hivev1.ClusterDeploymentCondition{ Type: hivev1.ClusterHibernatingCondition, Status: corev1.ConditionTrue, @@ -415,35 +409,6 @@ func TestReconcileClusterClaim(t *testing.T) { }, }, }, - { - name: "new assignment bring cluster out of hibernation", - claim: initializedClaimBuilder.Build(testclaim.WithCluster(clusterName)), - cd: cdBuilder.Build( - testcd.WithUnclaimedClusterPoolReference(claimNamespace, "test-pool"), - testcd.WithPowerState(hivev1.HibernatingClusterPowerState), - testcd.WithCondition(hivev1.ClusterDeploymentCondition{ - Type: hivev1.ClusterHibernatingCondition, - Status: corev1.ConditionTrue, - }), - ), - expectCompletedClaim: true, - expectRBAC: true, - expectHibernating: false, - expectedConditions: []hivev1.ClusterClaimCondition{ - { - Type: hivev1.ClusterClaimPendingCondition, - Status: corev1.ConditionFalse, - Reason: "ClusterClaimed", - Message: "Cluster claimed", - }, - { - Type: hivev1.ClusterRunningCondition, - Status: corev1.ConditionFalse, - Reason: "Resuming", - Message: "Waiting for cluster to be running", - }, - }, - }, { name: "existing assignment does not change power state", claim: initializedClaimBuilder.Build(testclaim.WithCluster(clusterName)), diff --git a/pkg/controller/clusterpool/clusterpool_controller.go b/pkg/controller/clusterpool/clusterpool_controller.go index 49e34a4dbc0..d6b6b7591a3 100644 --- a/pkg/controller/clusterpool/clusterpool_controller.go +++ b/pkg/controller/clusterpool/clusterpool_controller.go @@ -5,7 +5,6 @@ import ( "fmt" "math" "reflect" - "sort" "github.com/pkg/errors" log "github.com/sirupsen/logrus" @@ -44,6 +43,7 @@ const ( clusterPoolAdminRoleBindingName = "hive-cluster-pool-admin-binding" icSecretDependent = "install config template secret" cdClusterPoolIndex = "spec.clusterpool.namespacedname" + claimClusterPoolIndex = "spec.clusterpoolname" ) var ( @@ -108,6 +108,19 @@ func AddToManager(mgr manager.Manager, r *ReconcileClusterPool, concurrentReconc return err } + // Index ClusterClaims by pool name + if err := mgr.GetFieldIndexer().IndexField(context.TODO(), &hivev1.ClusterClaim{}, claimClusterPoolIndex, + func(o client.Object) []string { + claim := o.(*hivev1.ClusterClaim) + if poolName := claim.Spec.ClusterPoolName; poolName != "" { + return []string{poolName} + } + return []string{} + }); err != nil { + log.WithError(err).Error("Error indexing ClusterClaims by ClusterPool") + return err + } + // Watch for changes to ClusterPool err = c.Watch(&source.Kind{Type: &hivev1.ClusterPool{}}, &handler.EnqueueRequestForObject{}) if err != nil { @@ -220,7 +233,7 @@ type ReconcileClusterPool struct { // attempts to reach the desired state if not. func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { logger := controllerutils.BuildControllerLogger(ControllerName, "clusterPool", request.NamespacedName) - logger.Infof("reconciling cluster pool") + logger.Info("reconciling cluster pool") recobsrv := hivemetrics.NewReconcileObserver(ControllerName, logger) defer recobsrv.ObserveControllerReconcileTime() @@ -242,7 +255,7 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile. newConditions := controllerutils.InitializeClusterPoolConditions(clp.Status.Conditions, clusterPoolConditions) if len(newConditions) > len(clp.Status.Conditions) { clp.Status.Conditions = newConditions - logger.Infof("initializing cluster pool conditions") + logger.Info("initializing cluster pool conditions") if err := r.Status().Update(context.TODO(), clp); err != nil { logger.WithError(err).Log(controllerutils.LogLevel(err), "failed to update cluster pool status") return reconcile.Result{}, err @@ -270,48 +283,22 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile. return reconcile.Result{}, nil } - // Find all ClusterDeployments from this pool: - claimedCDs, unClaimedCDs, err := r.getAllClusterDeploymentsForPool(clp, logger) + cds, err := getAllClusterDeploymentsForPool(r.Client, clp, logger) if err != nil { return reconcile.Result{}, err } - var toRemoveClaimedCDs []*hivev1.ClusterDeployment - numberOfDeletingClaimedCDs := 0 - for _, cd := range claimedCDs { - toRemove := controllerutils.IsClaimedClusterMarkedForRemoval(cd) - switch { - case cd.DeletionTimestamp != nil: - numberOfDeletingClaimedCDs++ - case toRemove: - toRemoveClaimedCDs = append(toRemoveClaimedCDs, cd) - } - } - - var installingCDs []*hivev1.ClusterDeployment - var readyCDs []*hivev1.ClusterDeployment - numberOfDeletingCDs := 0 - for _, cd := range unClaimedCDs { - switch { - case cd.DeletionTimestamp != nil: - numberOfDeletingCDs++ - case !cd.Spec.Installed: - installingCDs = append(installingCDs, cd) - default: - readyCDs = append(readyCDs, cd) - } + claims, err := getAllClaimsForPool(r.Client, clp, logger) + if err != nil { + return reconcile.Result{}, err } - logger.WithFields(log.Fields{ - "installing": len(installingCDs), - "deleting": numberOfDeletingCDs, - "total": len(unClaimedCDs), - "ready": len(readyCDs), - }).Debug("found clusters for ClusterPool") + claims.SyncClusterDeploymentAssignments(r.Client, cds, logger) + cds.SyncClaimAssignments(r.Client, claims, logger) origStatus := clp.Status.DeepCopy() - clp.Status.Size = int32(len(installingCDs) + len(readyCDs)) - clp.Status.Ready = int32(len(readyCDs)) + clp.Status.Size = int32(len(cds.Installing()) + len(cds.Assignable())) + clp.Status.Ready = int32(len(cds.Assignable())) if !reflect.DeepEqual(origStatus, &clp.Status) { if err := r.Status().Update(context.Background(), clp); err != nil { logger.WithError(err).Log(controllerutils.LogLevel(err), "could not update ClusterPool status") @@ -321,11 +308,13 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile. availableCapacity := math.MaxInt32 if clp.Spec.MaxSize != nil { - availableCapacity = int(*clp.Spec.MaxSize) - len(unClaimedCDs) - len(claimedCDs) + availableCapacity = int(*clp.Spec.MaxSize) - cds.Total() + numUnassigned := len(cds.Installing()) + len(cds.Assignable()) + numAssigned := cds.NumAssigned() if availableCapacity <= 0 { logger.WithFields(log.Fields{ - "UnclaimedSize": len(unClaimedCDs), - "ClaimedSize": len(claimedCDs), + "UnclaimedSize": numUnassigned, + "ClaimedSize": numAssigned, "Capacity": *clp.Spec.MaxSize, }).Info("Cannot add more clusters because no capacity available.") } @@ -335,29 +324,24 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile. return reconcile.Result{}, err } - pendingClaims, err := r.getAllPendingClusterClaims(clp, logger) - if err != nil { - return reconcile.Result{}, err - } - logger.WithField("count", len(pendingClaims)).Debug("found pending claims for ClusterPool") - // reserveSize is the number of clusters that the pool currently has in reserve - reserveSize := len(installingCDs) + len(readyCDs) - len(pendingClaims) + reserveSize := len(cds.Installing()) + len(cds.Assignable()) - len(claims.Unassigned()) - readyCDs, err = r.assignClustersToClaims(pendingClaims, readyCDs, logger) - if err != nil { + if err := assignClustersToClaims(r.Client, claims, cds, logger); err != nil { + logger.WithError(err).Error("error assigning clusters <=> claims") return reconcile.Result{}, err } availableCurrent := math.MaxInt32 if clp.Spec.MaxConcurrent != nil { - availableCurrent = int(*clp.Spec.MaxConcurrent) - len(installingCDs) - numberOfDeletingCDs - numberOfDeletingClaimedCDs + availableCurrent = int(*clp.Spec.MaxConcurrent) - len(cds.Installing()) - len(cds.Deleting()) if availableCurrent < 0 { availableCurrent = 0 } } // remove clusters that were previously claimed but now not required. + toRemoveClaimedCDs := cds.MarkedForDeletion() toDel := minIntVarible(len(toRemoveClaimedCDs), availableCurrent) for _, cd := range toRemoveClaimedCDs[:toDel] { cdLog := logger.WithField("cluster", cd.Name) @@ -379,7 +363,7 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile. // If too many, delete some. case drift > 0: toDel := minIntVarible(drift, availableCurrent) - if err := r.deleteExcessClusters(installingCDs, readyCDs, toDel, logger); err != nil { + if err := r.deleteExcessClusters(cds.Installing(), cds.Assignable(), toDel, logger); err != nil { return reconcile.Result{}, err } // If too few, create new InstallConfig and ClusterDeployment. @@ -648,11 +632,6 @@ func (r *ReconcileClusterPool) deleteExcessClusters( logger.WithField("deletionsNeeded", deletionsNeeded).Info("deleting excess clusters") clustersToDelete := make([]*hivev1.ClusterDeployment, 0, deletionsNeeded) if deletionsNeeded < len(installingClusters) { - // Sort the installing clusters in order by creation timestamp from newest to oldest. This has the effect of - // prioritizing deleting those clusters that have the longest time until they are installed. - sort.Slice(installingClusters, func(i, j int) bool { - return installingClusters[i].CreationTimestamp.After(installingClusters[j].CreationTimestamp.Time) - }) clustersToDelete = installingClusters[:deletionsNeeded] } else { clustersToDelete = append(clustersToDelete, installingClusters...) @@ -683,11 +662,11 @@ func (r *ReconcileClusterPool) reconcileDeletedPool(pool *hivev1.ClusterPool, lo if !controllerutils.HasFinalizer(pool, finalizer) { return nil } - _, unClaimedCDs, err := r.getAllClusterDeploymentsForPool(pool, logger) + cds, err := getAllClusterDeploymentsForPool(r.Client, pool, logger) if err != nil { return err } - for _, cd := range unClaimedCDs { + for _, cd := range append(cds.Assignable(), cds.Installing()...) { if cd.DeletionTimestamp != nil { continue } @@ -704,29 +683,6 @@ func (r *ReconcileClusterPool) reconcileDeletedPool(pool *hivev1.ClusterPool, lo return nil } -func (r *ReconcileClusterPool) getAllClusterDeploymentsForPool(pool *hivev1.ClusterPool, logger log.FieldLogger) (claimed, unclaimed []*hivev1.ClusterDeployment, err error) { - cdList := &hivev1.ClusterDeploymentList{} - if err := r.Client.List(context.Background(), cdList, - client.MatchingFields{cdClusterPoolIndex: poolKey(pool.GetNamespace(), pool.GetName())}); err != nil { - logger.WithError(err).Error("error listing ClusterDeployments") - return nil, nil, err - } - for i, cd := range cdList.Items { - poolRef := cd.Spec.ClusterPoolRef - if poolRef == nil || poolRef.Namespace != pool.Namespace || poolRef.PoolName != pool.Name { - // This shouldn't happen IRL, but controller-runtime fake client doesn't support index filters - logger.Errorf("unepectedly got a ClusterDeployment not belonging to this pool; ClusterPoolRef: %v", cd.Spec.ClusterPoolRef) - continue - } - if poolRef.ClaimName != "" { - claimed = append(claimed, &cdList.Items[i]) - } else { - unclaimed = append(unclaimed, &cdList.Items[i]) - } - } - return claimed, unclaimed, nil -} - func poolReference(pool *hivev1.ClusterPool) hivev1.ClusterPoolReference { return hivev1.ClusterPoolReference{ Namespace: pool.Namespace, @@ -902,76 +858,3 @@ func (r *ReconcileClusterPool) createCloudBuilder(pool *hivev1.ClusterPool, logg return nil, errors.New("unsupported platform") } } - -// getAllPendingClusterClaims returns all of the ClusterClaims that are requesting clusters from the specified pool. -// The claims are returned in order of creation time, from oldest to youngest. -func (r *ReconcileClusterPool) getAllPendingClusterClaims(pool *hivev1.ClusterPool, logger log.FieldLogger) ([]*hivev1.ClusterClaim, error) { - claimsList := &hivev1.ClusterClaimList{} - if err := r.Client.List(context.Background(), claimsList, client.InNamespace(pool.Namespace)); err != nil { - logger.WithError(err).Error("error listing ClusterClaims") - return nil, err - } - var pendingClaims []*hivev1.ClusterClaim - for i, claim := range claimsList.Items { - // skip claims for other pools - if claim.Spec.ClusterPoolName != pool.Name { - continue - } - // skip claims that have been assigned already - if claim.Spec.Namespace != "" { - continue - } - pendingClaims = append(pendingClaims, &claimsList.Items[i]) - } - sort.Slice( - pendingClaims, - func(i, j int) bool { - return pendingClaims[i].CreationTimestamp.Before(&pendingClaims[j].CreationTimestamp) - }, - ) - return pendingClaims, nil -} - -func (r *ReconcileClusterPool) assignClustersToClaims(claims []*hivev1.ClusterClaim, cds []*hivev1.ClusterDeployment, logger log.FieldLogger) ([]*hivev1.ClusterDeployment, error) { - for _, claim := range claims { - logger := logger.WithField("claim", claim.Name) - var conds []hivev1.ClusterClaimCondition - var statusChanged bool - if len(cds) > 0 { - claim.Spec.Namespace = cds[0].Namespace - cds = cds[1:] - logger.WithField("cluster", claim.Spec.Namespace).Info("assigning cluster to claim") - if err := r.Update(context.Background(), claim); err != nil { - logger.WithError(err).Log(controllerutils.LogLevel(err), "could not assign cluster to claim") - return cds, err - } - conds = controllerutils.SetClusterClaimCondition( - claim.Status.Conditions, - hivev1.ClusterClaimPendingCondition, - corev1.ConditionTrue, - "ClusterAssigned", - "Cluster assigned to ClusterClaim, awaiting claim", - controllerutils.UpdateConditionIfReasonOrMessageChange, - ) - statusChanged = true - } else { - logger.Debug("no clusters ready to assign to claim") - conds, statusChanged = controllerutils.SetClusterClaimConditionWithChangeCheck( - claim.Status.Conditions, - hivev1.ClusterClaimPendingCondition, - corev1.ConditionTrue, - "NoClusters", - "No clusters in pool are ready to be claimed", - controllerutils.UpdateConditionIfReasonOrMessageChange, - ) - } - if statusChanged { - claim.Status.Conditions = conds - if err := r.Status().Update(context.Background(), claim); err != nil { - logger.WithError(err).Log(controllerutils.LogLevel(err), "could not update status of ClusterClaim") - return cds, err - } - } - } - return cds, nil -} diff --git a/pkg/controller/clusterpool/clusterpool_controller_test.go b/pkg/controller/clusterpool/clusterpool_controller_test.go index 33f28405c45..17b7b671be5 100644 --- a/pkg/controller/clusterpool/clusterpool_controller_test.go +++ b/pkg/controller/clusterpool/clusterpool_controller_test.go @@ -75,6 +75,8 @@ func TestReconcileClusterPool(t *testing.T) { ) } + nowish := time.Now() + tests := []struct { name string existing []runtime.Object @@ -91,7 +93,13 @@ func TestReconcileClusterPool(t *testing.T) { expectedMissingDependenciesMessage string expectedAssignedClaims int expectedUnassignedClaims int + expectedAssignedCDs int + expectedRunning int expectedLabels map[string]string // Tested on all clusters, so will not work if your test has pre-existing cds in the pool. + // Map, keyed by claim name, of expected Status.Conditions['Pending'].Reason. + // (The clusterpool controller always sets this condition's Status to True.) + // Not checked if nil. + expectedClaimPendingReasons map[string]string }{ { name: "initialize conditions", @@ -153,6 +161,7 @@ func TestReconcileClusterPool(t *testing.T) { name: "scale up with no more capacity including claimed", existing: []runtime.Object{ initializedPoolBuilder.Build(testcp.WithSize(5), testcp.WithMaxSize(3)), + testclaim.FullBuilder(testNamespace, "test", scheme).Build(testclaim.WithPool(testLeasePoolName)), cdBuilder("c1").Build(testcd.Installed(), testcd.WithClusterPoolReference(testNamespace, testLeasePoolName, "test")), unclaimedCDBuilder("c2").Build(testcd.Installed()), unclaimedCDBuilder("c3").Build(), @@ -160,12 +169,15 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 3, expectedObservedSize: 2, expectedObservedReady: 1, + expectedAssignedClaims: 1, + expectedAssignedCDs: 1, expectedCapacityStatus: corev1.ConditionFalse, }, { name: "scale up with some capacity including claimed", existing: []runtime.Object{ initializedPoolBuilder.Build(testcp.WithSize(5), testcp.WithMaxSize(4)), + testclaim.FullBuilder(testNamespace, "test", scheme).Build(testclaim.WithPool(testLeasePoolName)), cdBuilder("c1").Build(testcd.Installed(), testcd.WithClusterPoolReference(testNamespace, testLeasePoolName, "test")), unclaimedCDBuilder("c2").Build(testcd.Installed()), unclaimedCDBuilder("c3").Build(), @@ -173,6 +185,8 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 4, expectedObservedSize: 2, expectedObservedReady: 1, + expectedAssignedClaims: 1, + expectedAssignedCDs: 1, expectedCapacityStatus: corev1.ConditionTrue, }, { @@ -254,15 +268,18 @@ func TestReconcileClusterPool(t *testing.T) { name: "no scale up with max concurrent and some deleting claimed clusters", existing: []runtime.Object{ initializedPoolBuilder.Build(testcp.WithSize(5), testcp.WithMaxConcurrent(2)), + testclaim.FullBuilder(testNamespace, "test-claim", scheme).Build(testclaim.WithPool(testLeasePoolName)), cdBuilder("c1").GenericOptions(generic.Deleted()).Build( testcd.WithClusterPoolReference(testNamespace, testLeasePoolName, "test-claim"), ), unclaimedCDBuilder("c2").Build(testcd.Installed()), unclaimedCDBuilder("c3").Build(), }, - expectedTotalClusters: 3, - expectedObservedSize: 2, - expectedObservedReady: 1, + expectedTotalClusters: 3, + expectedObservedSize: 2, + expectedObservedReady: 1, + expectedAssignedClaims: 1, + expectedAssignedCDs: 1, }, { name: "scale up with max concurrent and some deleting", @@ -415,6 +432,7 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 4, expectedObservedSize: 3, expectedObservedReady: 2, + expectedAssignedCDs: 1, }, { name: "clusters in different pool are not counted against pool size", @@ -590,11 +608,13 @@ func TestReconcileClusterPool(t *testing.T) { unclaimedCDBuilder("c3").Build(), testclaim.FullBuilder(testNamespace, "test-claim", scheme).Build(testclaim.WithPool(testLeasePoolName)), }, - expectedTotalClusters: 4, - expectedObservedSize: 3, - expectedObservedReady: 2, - expectedAssignedClaims: 1, - expectedUnassignedClaims: 0, + expectedTotalClusters: 4, + expectedObservedSize: 3, + expectedObservedReady: 2, + expectedAssignedClaims: 1, + expectedAssignedCDs: 1, + expectedRunning: 1, + expectedClaimPendingReasons: map[string]string{"test-claim": "ClusterAssigned"}, }, { name: "no ready clusters to assign to claim", @@ -605,11 +625,12 @@ func TestReconcileClusterPool(t *testing.T) { unclaimedCDBuilder("c3").Build(), testclaim.FullBuilder(testNamespace, "test-claim", scheme).Build(testclaim.WithPool(testLeasePoolName)), }, - expectedTotalClusters: 4, - expectedObservedSize: 3, - expectedObservedReady: 0, - expectedAssignedClaims: 0, - expectedUnassignedClaims: 1, + expectedTotalClusters: 4, + expectedObservedSize: 3, + expectedObservedReady: 0, + expectedAssignedClaims: 0, + expectedUnassignedClaims: 1, + expectedClaimPendingReasons: map[string]string{"test-claim": "NoClusters"}, }, { name: "assign to multiple claims", @@ -618,15 +639,32 @@ func TestReconcileClusterPool(t *testing.T) { unclaimedCDBuilder("c1").Build(testcd.Installed()), unclaimedCDBuilder("c2").Build(testcd.Installed()), unclaimedCDBuilder("c3").Build(), - testclaim.FullBuilder(testNamespace, "test-claim-1", scheme).Build(testclaim.WithPool(testLeasePoolName)), - testclaim.FullBuilder(testNamespace, "test-claim-2", scheme).Build(testclaim.WithPool(testLeasePoolName)), - testclaim.FullBuilder(testNamespace, "test-claim-3", scheme).Build(testclaim.WithPool(testLeasePoolName)), + // Claims are assigned in FIFO order by creationTimestamp + testclaim.FullBuilder(testNamespace, "test-claim-1", scheme).Build( + testclaim.WithPool(testLeasePoolName), + testclaim.Generic(testgeneric.WithCreationTimestamp(nowish.Add(-time.Second*2))), + ), + testclaim.FullBuilder(testNamespace, "test-claim-2", scheme).Build( + testclaim.WithPool(testLeasePoolName), + testclaim.Generic(testgeneric.WithCreationTimestamp(nowish.Add(-time.Second))), + ), + testclaim.FullBuilder(testNamespace, "test-claim-3", scheme).Build( + testclaim.WithPool(testLeasePoolName), + testclaim.Generic(testgeneric.WithCreationTimestamp(nowish)), + ), }, expectedTotalClusters: 6, expectedObservedSize: 3, expectedObservedReady: 2, expectedAssignedClaims: 2, + expectedAssignedCDs: 2, + expectedRunning: 2, expectedUnassignedClaims: 1, + expectedClaimPendingReasons: map[string]string{ + "test-claim-1": "ClusterAssigned", + "test-claim-2": "ClusterAssigned", + "test-claim-3": "NoClusters", + }, }, { name: "do not assign to claims for other pools", @@ -635,13 +673,22 @@ func TestReconcileClusterPool(t *testing.T) { unclaimedCDBuilder("c1").Build(testcd.Installed()), unclaimedCDBuilder("c2").Build(testcd.Installed()), unclaimedCDBuilder("c3").Build(), - testclaim.FullBuilder(testNamespace, "test-claim", scheme).Build(testclaim.WithPool("other-pool")), + testclaim.FullBuilder(testNamespace, "test-claim", scheme).Build( + testclaim.WithPool("other-pool"), + testclaim.WithCondition(hivev1.ClusterClaimCondition{ + Type: hivev1.ClusterClaimPendingCondition, + Status: corev1.ConditionFalse, + Reason: "ThisShouldNotChange", + Message: "Claim ignored because not in the pool", + }), + ), }, - expectedTotalClusters: 3, - expectedObservedSize: 3, - expectedObservedReady: 2, - expectedAssignedClaims: 0, - expectedUnassignedClaims: 1, + expectedTotalClusters: 3, + expectedObservedSize: 3, + expectedObservedReady: 2, + expectedAssignedClaims: 0, + expectedUnassignedClaims: 1, + expectedClaimPendingReasons: map[string]string{"test-claim": "ThisShouldNotChange"}, }, { name: "do not assign to claims in other namespaces", @@ -650,13 +697,22 @@ func TestReconcileClusterPool(t *testing.T) { unclaimedCDBuilder("c1").Build(testcd.Installed()), unclaimedCDBuilder("c2").Build(testcd.Installed()), unclaimedCDBuilder("c3").Build(), - testclaim.FullBuilder("other-namespace", "test-claim", scheme).Build(testclaim.WithPool(testLeasePoolName)), + testclaim.FullBuilder("other-namespace", "test-claim", scheme).Build( + testclaim.WithPool(testLeasePoolName), + testclaim.WithCondition(hivev1.ClusterClaimCondition{ + Type: hivev1.ClusterClaimPendingCondition, + Status: corev1.ConditionFalse, + Reason: "ThisShouldNotChange", + Message: "Claim ignored because not in the namespace", + }), + ), }, - expectedTotalClusters: 3, - expectedObservedSize: 3, - expectedObservedReady: 2, - expectedAssignedClaims: 0, - expectedUnassignedClaims: 1, + expectedTotalClusters: 3, + expectedObservedSize: 3, + expectedObservedReady: 2, + expectedAssignedClaims: 0, + expectedUnassignedClaims: 1, + expectedClaimPendingReasons: map[string]string{"test-claim": "ThisShouldNotChange"}, }, { name: "do not delete previously claimed clusters", @@ -672,6 +728,7 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 4, expectedObservedSize: 3, expectedObservedReady: 2, + expectedAssignedCDs: 1, }, { name: "do not delete previously claimed clusters 2", @@ -689,6 +746,7 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 4, expectedObservedSize: 3, expectedObservedReady: 2, + expectedAssignedCDs: 1, }, { name: "delete previously claimed clusters", @@ -724,6 +782,7 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 4, expectedObservedSize: 3, expectedObservedReady: 1, + expectedAssignedCDs: 1, }, { name: "deleting previously claimed clusters should use max concurrent 2", @@ -741,6 +800,7 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 4, expectedObservedSize: 2, expectedObservedReady: 1, + expectedAssignedCDs: 1, }, { name: "deleting previously claimed clusters should use max concurrent 3", @@ -819,6 +879,7 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 3, expectedObservedSize: 2, expectedObservedReady: 1, + expectedAssignedCDs: 1, expectedDeletedClusters: []string{"c4"}, }, { @@ -883,8 +944,31 @@ func TestReconcileClusterPool(t *testing.T) { expectedTotalClusters: 4, expectedObservedSize: 3, expectedObservedReady: 2, + expectedAssignedCDs: 1, expectedDeletedClusters: []string{"c4"}, }, + { + name: "claims exceed capacity", + existing: []runtime.Object{ + initializedPoolBuilder.Build(testcp.WithSize(2)), + testclaim.FullBuilder(testNamespace, "test-claim1", scheme).Build(testclaim.WithPool(testLeasePoolName)), + testclaim.FullBuilder(testNamespace, "test-claim2", scheme).Build(testclaim.WithPool(testLeasePoolName)), + testclaim.FullBuilder(testNamespace, "test-claim3", scheme).Build(testclaim.WithPool(testLeasePoolName)), + }, + expectedTotalClusters: 5, + // The assignments don't happen until a subsequent reconcile after the CDs are ready + expectedUnassignedClaims: 3, + }, + { + name: "zero size pool", + existing: []runtime.Object{ + initializedPoolBuilder.Build(testcp.WithSize(0)), + testclaim.FullBuilder(testNamespace, "test-claim", scheme).Build(testclaim.WithPool(testLeasePoolName)), + }, + expectedTotalClusters: 1, + // The assignments don't happen until a subsequent reconcile after the CDs are ready + expectedUnassignedClaims: 1, + }, } for _, test := range tests { @@ -940,14 +1024,30 @@ func TestReconcileClusterPool(t *testing.T) { } } + var actualAssignedCDs, actualUnassignedCDs, actualRunning, actualHibernating int for _, cd := range cds.Items { - assert.Equal(t, hivev1.HibernatingClusterPowerState, cd.Spec.PowerState, "expected cluster to be hibernating") + if poolRef := cd.Spec.ClusterPoolRef; poolRef == nil || poolRef.PoolName != testLeasePoolName || poolRef.ClaimName == "" { + actualUnassignedCDs++ + } else { + actualAssignedCDs++ + } + switch powerState := cd.Spec.PowerState; powerState { + case hivev1.RunningClusterPowerState: + actualRunning++ + case hivev1.HibernatingClusterPowerState: + actualHibernating++ + } + if test.expectedLabels != nil { for k, v := range test.expectedLabels { assert.Equal(t, v, cd.Labels[k]) } } } + assert.Equal(t, test.expectedAssignedCDs, actualAssignedCDs, "unexpected number of assigned CDs") + assert.Equal(t, test.expectedTotalClusters-test.expectedAssignedCDs, actualUnassignedCDs, "unexpected number of unassigned CDs") + assert.Equal(t, test.expectedRunning, actualRunning, "unexpected number of running CDs") + assert.Equal(t, test.expectedTotalClusters-test.expectedRunning, actualHibernating, "unexpected number of assigned CDs") pool := &hivev1.ClusterPool{} err = fakeClient.Get(context.Background(), client.ObjectKey{Namespace: testNamespace, Name: testLeasePoolName}, pool) @@ -987,6 +1087,14 @@ func TestReconcileClusterPool(t *testing.T) { actualAssignedClaims := 0 actualUnassignedClaims := 0 for _, claim := range claims.Items { + if test.expectedClaimPendingReasons != nil { + if reason, ok := test.expectedClaimPendingReasons[claim.Name]; ok { + actualCond := controllerutils.FindClusterClaimCondition(claim.Status.Conditions, hivev1.ClusterClaimPendingCondition) + if assert.NotNil(t, actualCond, "did not find Pending condition on claim %s", claim.Name) { + assert.Equal(t, reason, actualCond.Reason, "wrong reason on Pending condition for claim %s", claim.Name) + } + } + } if claim.Spec.Namespace == "" { actualUnassignedClaims++ } else { diff --git a/pkg/controller/clusterpool/collections.go b/pkg/controller/clusterpool/collections.go new file mode 100644 index 00000000000..136263178dd --- /dev/null +++ b/pkg/controller/clusterpool/collections.go @@ -0,0 +1,486 @@ +package clusterpool + +import ( + "context" + "errors" + "fmt" + "sort" + + log "github.com/sirupsen/logrus" + + corev1 "k8s.io/api/core/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "sigs.k8s.io/controller-runtime/pkg/client" + + hivev1 "github.com/openshift/hive/apis/hive/v1" + controllerutils "github.com/openshift/hive/pkg/controller/utils" +) + +type claimCollection struct { + // All claims for this pool + byClaimName map[string]*hivev1.ClusterClaim + unassigned []*hivev1.ClusterClaim + // This contains only assigned claims + byCDName map[string]*hivev1.ClusterClaim +} + +// getAllClaimsForPool is the constructor for a claimCollection for all of the +// ClusterClaims that are requesting clusters from the specified pool. +func getAllClaimsForPool(c client.Client, pool *hivev1.ClusterPool, logger log.FieldLogger) (*claimCollection, error) { + claimsList := &hivev1.ClusterClaimList{} + if err := c.List( + context.Background(), claimsList, + client.MatchingFields{claimClusterPoolIndex: pool.Name}, + client.InNamespace(pool.Namespace)); err != nil { + logger.WithError(err).Error("error listing ClusterClaims") + return nil, err + } + claimCol := claimCollection{ + byClaimName: make(map[string]*hivev1.ClusterClaim), + unassigned: make([]*hivev1.ClusterClaim, 0), + byCDName: make(map[string]*hivev1.ClusterClaim), + } + for i, claim := range claimsList.Items { + // skip claims for other pools + // This should only happen in unit tests: the fakeclient doesn't support index filters + if claim.Spec.ClusterPoolName != pool.Name { + logger.WithFields(log.Fields{ + "claim": claim.Name, + "claimPool": claim.Spec.ClusterPoolName, + "reconcilePool": pool.Name, + }).Error("unepectedly got a ClusterClaim not belonging to this pool") + continue + } + ref := &claimsList.Items[i] + claimCol.byClaimName[claim.Name] = ref + if cdName := claim.Spec.Namespace; cdName == "" { + claimCol.unassigned = append(claimCol.unassigned, ref) + } else { + // TODO: Though it should be impossible without manual intervention, if multiple claims + // ref the same CD, whichever comes last in the list will "win". If this is deemed + // important enough to worry about, consider making byCDName a map[string][]*Claim + // instead. + claimCol.byCDName[cdName] = ref + } + } + // Sort assignable claims by creationTimestamp for FIFO behavior. + sort.Slice( + claimCol.unassigned, + func(i, j int) bool { + return claimCol.unassigned[i].CreationTimestamp.Before(&claimCol.unassigned[j].CreationTimestamp) + }, + ) + + logger.WithFields(log.Fields{ + "assignedCount": len(claimCol.byCDName), + "unassignedCount": len(claimCol.unassigned), + }).Debug("found claims for ClusterPool") + + return &claimCol, nil +} + +// ByName returns the named claim from the collection, or nil if no claim by that name exists. +func (c *claimCollection) ByName(claimName string) *hivev1.ClusterClaim { + claim, _ := c.byClaimName[claimName] + return claim +} + +// Unassigned returns a list of claims that are not assigned to clusters yet. The list is sorted by +// age, oldest first. +func (c *claimCollection) Unassigned() []*hivev1.ClusterClaim { + return c.unassigned +} + +// Assign assigns the specified claim to the specified cluster, updating its spec and status on +// the server. Errors updating the spec or status are bubbled up. Returns an error if the claim is +// already assigned (to *any* CD). Does *not* validate that the CD isn't already assigned (to this +// or another claim). +func (claims *claimCollection) Assign(c client.Client, claim *hivev1.ClusterClaim, cd *hivev1.ClusterDeployment) error { + if claim.Spec.Namespace != "" { + return fmt.Errorf("Claim %s is already assigned to %s. This is a bug!", claim.Name, claim.Spec.Namespace) + } + for i, claimi := range claims.unassigned { + if claimi.Name == claim.Name { + // Update the spec + claimi.Spec.Namespace = cd.Namespace + if err := c.Update(context.Background(), claimi); err != nil { + return err + } + // Update the status + claimi.Status.Conditions = controllerutils.SetClusterClaimCondition( + claimi.Status.Conditions, + hivev1.ClusterClaimPendingCondition, + corev1.ConditionTrue, + "ClusterAssigned", + "Cluster assigned to ClusterClaim, awaiting claim", + controllerutils.UpdateConditionIfReasonOrMessageChange, + ) + if err := c.Status().Update(context.Background(), claimi); err != nil { + return err + } + // "Move" the claim from the unassigned list to the assigned map. + // (unassigned remains sorted as this is a removal) + claims.byCDName[claim.Spec.Namespace] = claimi + copy(claims.unassigned[i:], claims.unassigned[i+1:]) + claims.unassigned = claims.unassigned[:len(claims.unassigned)-1] + return nil + } + } + return fmt.Errorf("Claim %s is not assigned, but was not found in the unassigned list. This is a bug!", claim.Name) +} + +// SyncClusterDeploymentAssignments makes sure each claim which purports to be assigned has the +// correct CD assigned to it, updating the CD and/or claim on the server as necessary. +func (claims *claimCollection) SyncClusterDeploymentAssignments(c client.Client, cds *cdCollection, logger log.FieldLogger) { + invalidCDs := []string{} + claimsToRemove := []string{} + for cdName, claim := range claims.byCDName { + cd := cds.ByName(cdName) + logger := logger.WithFields(log.Fields{ + "ClusterDeployment": cdName, + "Claim": claim.Name, + }) + if cd == nil { + logger.Error("couldn't sync ClusterDeployment to the claim assigned to it: ClusterDeployment not found") + } else if err := ensureClaimAssignment(c, claim, claims, cd, cds, logger); err != nil { + logger.WithError(err).Error("couldn't sync ClusterDeployment to the claim assigned to it") + } else { + // Happy path + continue + } + // The claim and CD are no good; remove them from further consideration + invalidCDs = append(invalidCDs, cdName) + claimsToRemove = append(claimsToRemove, claim.Name) + } + cds.MakeNotAssignable(invalidCDs...) + claims.Untrack(claimsToRemove...) +} + +// Untrack removes the named claims from the claimCollection, so they are no longer +// - returned via ByName() or Unassigned() +// - available for Assign() or affected by SyncClusterDeploymentAssignments +// Do this to broken claims. +func (c *claimCollection) Untrack(claimNames ...string) { + for _, claimName := range claimNames { + found, ok := c.byClaimName[claimName] + if !ok { + // Count on consistency: if it's not in one collection, it's not in any of them + return + } + delete(c.byClaimName, claimName) + for i, claim := range c.unassigned { + if claim.Name == claimName { + copy(c.unassigned[i:], c.unassigned[i+1:]) + c.unassigned = c.unassigned[:len(c.unassigned)-1] + } + } + if cdName := found.Spec.Namespace; cdName != "" { + // TODO: Should just be able to + // delete(c.byCDName, cdName) + // but it's theoretically possible multiple claims ref the same CD, so double check that + // this is the right one. + if toRemove, ok := c.byCDName[cdName]; ok && toRemove.Name == claimName { + delete(c.byCDName, cdName) + } + } + } +} + +type cdCollection struct { + // Unclaimed installed clusters which belong to this pool and are not (marked for) deleting + assignable []*hivev1.ClusterDeployment + // Unclaimed installing clusters which belong to this pool and are not (marked for) deleting + installing []*hivev1.ClusterDeployment + // Clusters with a DeletionTimestamp. Mutually exclusive with markedForDeletion. + deleting []*hivev1.ClusterDeployment + // Clusters with the ClusterClaimRemoveClusterAnnotation. Mutually exclusive with deleting. + markedForDeletion []*hivev1.ClusterDeployment + // All CDs in this pool + byCDName map[string]*hivev1.ClusterDeployment + // This contains only claimed CDs + byClaimName map[string]*hivev1.ClusterDeployment +} + +// getAllClusterDeploymentsForPool is the constructor for a cdCollection +// comprising all the ClusterDeployments created for the specified ClusterPool. +func getAllClusterDeploymentsForPool(c client.Client, pool *hivev1.ClusterPool, logger log.FieldLogger) (*cdCollection, error) { + cdList := &hivev1.ClusterDeploymentList{} + if err := c.List(context.Background(), cdList, + client.MatchingFields{cdClusterPoolIndex: poolKey(pool.GetNamespace(), pool.GetName())}); err != nil { + logger.WithError(err).Error("error listing ClusterDeployments") + return nil, err + } + cdCol := cdCollection{ + assignable: make([]*hivev1.ClusterDeployment, 0), + installing: make([]*hivev1.ClusterDeployment, 0), + deleting: make([]*hivev1.ClusterDeployment, 0), + byCDName: make(map[string]*hivev1.ClusterDeployment), + byClaimName: make(map[string]*hivev1.ClusterDeployment), + } + for i, cd := range cdList.Items { + poolRef := cd.Spec.ClusterPoolRef + if poolRef == nil || poolRef.Namespace != pool.Namespace || poolRef.PoolName != pool.Name { + // This should only happen in unit tests: the fakeclient doesn't support index filters + logger.WithFields(log.Fields{ + "ClusterDeployment": cd.Name, + "Pool": pool.Name, + "CD.PoolRef": cd.Spec.ClusterPoolRef, + }).Error("unepectedly got a ClusterDeployment not belonging to this pool") + continue + } + ref := &cdList.Items[i] + cdCol.byCDName[cd.Name] = ref + claimName := poolRef.ClaimName + if ref.DeletionTimestamp != nil { + cdCol.deleting = append(cdCol.deleting, ref) + } else if controllerutils.IsClaimedClusterMarkedForRemoval(ref) { + // Do *not* double count "deleting" and "marked for deletion" + cdCol.markedForDeletion = append(cdCol.markedForDeletion, ref) + } else if claimName == "" { + if cd.Spec.Installed { + cdCol.assignable = append(cdCol.assignable, ref) + } else { + cdCol.installing = append(cdCol.installing, ref) + } + } + // Register all claimed CDs, even if they're deleting/marked + if claimName != "" { + // TODO: Though it should be impossible without manual intervention, if multiple CDs + // ref the same claim, whichever comes last in the list will "win". If this is deemed + // important enough to worry about, consider making byClaimName a map[string][]*CD + // instead. + cdCol.byClaimName[claimName] = ref + } + } + // Sort assignable CDs so we assign them in FIFO order + sort.Slice( + cdCol.assignable, + func(i, j int) bool { + return cdCol.assignable[i].CreationTimestamp.Before(&cdCol.assignable[j].CreationTimestamp) + }, + ) + // Sort installing CDs so we prioritize deleting those that are furthest away from completing + // their installation (prioritizing preserving those that will be assignable the soonest). + sort.Slice( + cdCol.installing, + func(i, j int) bool { + return cdCol.installing[i].CreationTimestamp.After(cdCol.installing[j].CreationTimestamp.Time) + }, + ) + + logger.WithFields(log.Fields{ + "assignable": len(cdCol.assignable), + "claimed": len(cdCol.byClaimName), + "deleting": len(cdCol.deleting), + "installing": len(cdCol.installing), + "unclaimed": len(cdCol.installing) + len(cdCol.assignable), + }).Debug("found clusters for ClusterPool") + return &cdCol, nil +} + +// ByName returns the named ClusterDeployment from the cdCollection, or nil if no CD by that name exists. +func (cds *cdCollection) ByName(cdName string) *hivev1.ClusterDeployment { + cd, _ := cds.byCDName[cdName] + return cd + +} + +// Total returns the total number of ClusterDeployments in the cdCollection. +func (cds *cdCollection) Total() int { + return len(cds.byCDName) +} + +// NumAssigned returns the number of ClusterDeployments assigned to claims. +func (cds *cdCollection) NumAssigned() int { + return len(cds.byClaimName) +} + +// Assignable returns a list of ClusterDeployment refs, sorted by creationTimestamp +func (cds *cdCollection) Assignable() []*hivev1.ClusterDeployment { + return cds.assignable +} + +// Deleting returns the list of ClusterDeployments whose DeletionTimestamp is set. Not to be +// confused with MarkedForDeletion. +func (cds *cdCollection) Deleting() []*hivev1.ClusterDeployment { + return cds.deleting +} + +// MarkedForDeletion returns the list of ClusterDeployments with the +// ClusterClaimRemoveClusterAnnotation. Not to be confused with Deleting: if a CD has its +// DeletionTimestamp set, it is *not* included in MarkedForDeletion. +func (cds *cdCollection) MarkedForDeletion() []*hivev1.ClusterDeployment { + return cds.markedForDeletion +} + +// Installing returns the list of ClusterDeployments in the process of being installed. These are +// not available for claim assignment. +func (cds *cdCollection) Installing() []*hivev1.ClusterDeployment { + return cds.installing +} + +// Assign assigns the specified ClusterDeployment to the specified claim, updating its spec on the +// server. Errors from the update are bubbled up. Returns an error if the CD is already assigned +// (to *any* claim). The CD must be from the Assignable() list; otherwise it is an error. +func (cds *cdCollection) Assign(c client.Client, cd *hivev1.ClusterDeployment, claim *hivev1.ClusterClaim) error { + if cd.Spec.ClusterPoolRef.ClaimName != "" { + return fmt.Errorf("ClusterDeployment %s is already assigned to %s. This is a bug!", cd.Name, cd.Spec.ClusterPoolRef.ClaimName) + } + // "Move" the cd from assignable to byClaimName + for i, cdi := range cds.assignable { + if cdi.Name == cd.Name { + // Update the spec + cdi.Spec.ClusterPoolRef.ClaimName = claim.Name + cdi.Spec.PowerState = hivev1.RunningClusterPowerState + if err := c.Update(context.Background(), cdi); err != nil { + return err + } + // "Move" the CD from the assignable list to the assigned map + cds.byClaimName[cd.Spec.ClusterPoolRef.ClaimName] = cdi + copy(cds.assignable[i:], cds.assignable[i+1:]) + cds.assignable = cds.assignable[:len(cds.assignable)-1] + return nil + } + } + return fmt.Errorf("ClusterDeployment %s is not assigned, but was not found in the assignable list. This is a bug!", cd.Name) +} + +// SyncClaimAssignments makes sure each ClusterDeployment which purports to be assigned has the +// correct claim assigned to it, updating the CD and/or claim on the server as necessary. +func (cds *cdCollection) SyncClaimAssignments(c client.Client, claims *claimCollection, logger log.FieldLogger) { + claimsToRemove := []string{} + invalidCDs := []string{} + for claimName, cd := range cds.byClaimName { + logger := logger.WithFields(log.Fields{ + "Claim": claimName, + "ClusterDeployment": cd.Name, + }) + if claim := claims.ByName(claimName); claim == nil { + logger.Error("couldn't sync ClusterClaim to the ClusterDeployment assigned to it: Claim not found") + } else if err := ensureClaimAssignment(c, claim, claims, cd, cds, logger); err != nil { + logger.WithError(err).Error("couldn't sync ClusterClaim to the ClusterDeployment assigned to it") + } else { + // Happy path + continue + } + // The claim and CD are no good; remove them from further consideration + claimsToRemove = append(claimsToRemove, claimName) + invalidCDs = append(invalidCDs, cd.Name) + } + claims.Untrack(claimsToRemove...) + cds.MakeNotAssignable(invalidCDs...) +} + +// MakeNotAssignable idempotently removes the named ClusterDeployments from the assignable list of the +// cdCollection, so they are no longer considered for assignment. They still count against pool +// capacity. Do this to a broken ClusterDeployment -- e.g. one that +// - is assigned to the wrong claim, or a claim that doesn't exist +// - can't be synced with its claim for whatever reason in this iteration (e.g. Update() failure) +func (cds *cdCollection) MakeNotAssignable(cdNames ...string) { + for _, cdName := range cdNames { + for i, cd := range cds.assignable { + if cd.Name == cdName { + copy((cds.assignable)[i:], (cds.assignable)[i+1:]) + cds.assignable = cds.assignable[:len(cds.assignable)-1] + } + } + } +} + +// ensureClaimAssignment returns successfully (nil) when the claim and the cd are both assigned to each other. +// If a non-nil error is returned, it could mean anything else, including: +// - We were given bad parameters +// - We tried to update the claim and/or the cd but failed +func ensureClaimAssignment(c client.Client, claim *hivev1.ClusterClaim, claims *claimCollection, cd *hivev1.ClusterDeployment, cds *cdCollection, logger log.FieldLogger) error { + poolRefInCD := cd.Spec.ClusterPoolRef + + // These should never happen. If they do, it's a programmer error. The caller should only be + // processing CDs in the same pool as the claim, which means ClusterPoolRef is a) populated, + // and b) matches the claim's pool. + if poolRefInCD == nil { + return errors.New("unexpectedly got a ClusterDeployment with no ClusterPoolRef") + } + if poolRefInCD.Namespace != claim.Namespace || poolRefInCD.PoolName != claim.Spec.ClusterPoolName { + return fmt.Errorf("unexpectedly got a ClusterDeployment and a ClusterClaim in different pools. "+ + "ClusterDeployment %s is in pool %s/%s; "+ + "ClusterClaim %s is in pool %s/%s", + cd.Name, poolRefInCD.Namespace, poolRefInCD.PoolName, + claim.Name, claim.Namespace, claim.Spec.ClusterPoolName) + } + + // These should be nearly impossible, but may result from a timing issue (or an explicit update by a user?) + if poolRefInCD.ClaimName != "" && poolRefInCD.ClaimName != claim.Name { + return fmt.Errorf("conflict: ClusterDeployment %s is assigned to ClusterClaim %s (expected %s)", + cd.Name, poolRefInCD.ClaimName, claim.Name) + } + if claim.Spec.Namespace != "" && claim.Spec.Namespace != cd.Namespace { + // The clusterclaim_controller will eventually set the Pending/AssignmentConflict condition on this claim + return fmt.Errorf("conflict: ClusterClaim %s is assigned to ClusterDeployment %s (expected %s)", + claim.Name, claim.Spec.Namespace, cd.Namespace) + } + + logger = logger.WithField("claim", claim.Name).WithField("cluster", cd.Namespace) + logger.Debug("ensuring cluster <=> claim assignment") + + // Update the claim first + if claim.Spec.Namespace == "" { + logger.Info("updating claim to assign cluster") + if err := claims.Assign(c, claim, cd); err != nil { + return err + } + } else { + logger.Debug("claim already assigned") + } + + // Now update the CD + if poolRefInCD.ClaimName == "" { + logger.Info("updating cluster to assign claim") + if err := cds.Assign(c, cd, claim); err != nil { + return err + } + } else { + logger.Debug("cluster already assigned") + } + + logger.Debug("cluster <=> claim assignment ok") + return nil +} + +// assignClustersToClaims iterates over unassigned claims and assignable ClusterDeployments, in order (see +// claimCollection.Unassigned and cdCollection.Assignable), assigning them to each other, stopping when the +// first of the two lists is exhausted. +func assignClustersToClaims(c client.Client, claims *claimCollection, cds *cdCollection, logger log.FieldLogger) error { + // ensureClaimAssignment modifies claims.unassigned and cds.assignable, so make a copy of the lists. + // copy() limits itself to the size of the destination + numToAssign := minIntVarible(len(claims.Unassigned()), len(cds.Assignable())) + claimList := make([]*hivev1.ClusterClaim, numToAssign) + copy(claimList, claims.Unassigned()) + cdList := make([]*hivev1.ClusterDeployment, numToAssign) + copy(cdList, cds.Assignable()) + var errs []error + for i := 0; i < numToAssign; i++ { + if err := ensureClaimAssignment(c, claimList[i], claims, cdList[i], cds, logger); err != nil { + errs = append(errs, err) + } + } + // If any unassigned claims remain, mark their status accordingly + for _, claim := range claims.Unassigned() { + logger := logger.WithField("claim", claim.Name) + logger.Debug("no clusters ready to assign to claim") + if conds, statusChanged := controllerutils.SetClusterClaimConditionWithChangeCheck( + claim.Status.Conditions, + hivev1.ClusterClaimPendingCondition, + corev1.ConditionTrue, + "NoClusters", + "No clusters in pool are ready to be claimed", + controllerutils.UpdateConditionIfReasonOrMessageChange, + ); statusChanged { + claim.Status.Conditions = conds + if err := c.Status().Update(context.Background(), claim); err != nil { + logger.WithError(err).Log(controllerutils.LogLevel(err), "could not update status of ClusterClaim") + errs = append(errs, err) + } + } + } + return utilerrors.NewAggregate(errs) +}