Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions pkg/controller/clusterpool/clusterpool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,20 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile.
// reserveSize is the number of clusters that the pool currently has in reserve
reserveSize := len(cds.Installing()) + len(cds.Assignable()) + len(cds.Broken()) - len(claims.Unassigned())

// excessSize is the number of clusters in excess of the pool's capacity (Size) that we are
// creating to satisfy unassigned claims. For example:
// - In steady state, if Size=3 and we have 5 unassigned claims, excessSize is 2.
// - If Size=3, and we only have 2 clusters in the pool, and we have 6 unassigned claims,
// excessSize is 4: the two existing clusters satisfy two of the claims, leaving four;
// we need three to refill the pool and another four to fulfil the remaining unassigned
// claims.
// - Any time the number of unassigned claims is less than the number of clusters in the
// pool, we will be able to satisfy them from the pool, so excessSize is 0.
excessSize := 0
if reserveSize < 0 {
excessSize = -reserveSize
}

if err := assignClustersToClaims(r.Client, claims, cds, logger); err != nil {
logger.WithError(err).Error("error assigning clusters <=> claims")
return reconcile.Result{}, err
Expand Down Expand Up @@ -400,7 +414,7 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile.
metricStaleClusterDeploymentsDeleted.WithLabelValues(clp.Namespace, clp.Name).Inc()
}

if err := r.reconcileRunningClusters(clp, cds, logger); err != nil {
if err := r.reconcileRunningClusters(clp, cds, excessSize, logger); err != nil {
log.WithError(err).Error("error updating hibernating/running state")
return reconcile.Result{}, err
}
Expand All @@ -426,9 +440,13 @@ func (r *ReconcileClusterPool) Reconcile(ctx context.Context, request reconcile.
func (r *ReconcileClusterPool) reconcileRunningClusters(
clp *hivev1.ClusterPool,
cds *cdCollection,
excessCount int,
logger log.FieldLogger,
) error {
runningCount := int(clp.Spec.RunningCount)
// If we're creating excess clusters to satisfy unassigned claims, add that many
// to the runningCount. They'll get snatched up immediately, bringing the number
// of running clusters back down to runningCount once the pool reaches steady state.
runningCount := int(clp.Spec.RunningCount) + excessCount
cdList := append(cds.Assignable(), cds.Installing()...)
// Sort by age, oldest first
sort.Slice(
Expand Down
10 changes: 6 additions & 4 deletions pkg/controller/clusterpool/clusterpool_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,8 @@ func TestReconcileClusterPool(t *testing.T) {
expectedTotalClusters: 5,
// The assignments don't happen until a subsequent reconcile after the CDs are ready
expectedUnassignedClaims: 3,
// Even though runningCount is zero, we run enough clusters to fulfill the excess claims
expectedRunning: 3,
},
{
name: "zero size pool",
Expand All @@ -1179,6 +1181,8 @@ func TestReconcileClusterPool(t *testing.T) {
expectedTotalClusters: 1,
// The assignments don't happen until a subsequent reconcile after the CDs are ready
expectedUnassignedClaims: 1,
// Even though runningCount is zero, we run enough clusters to fulfill the excess claims
expectedRunning: 1,
},
{
name: "no CDs match pool version",
Expand Down Expand Up @@ -1364,10 +1368,8 @@ func TestReconcileClusterPool(t *testing.T) {
expectedTotalClusters: 9,
// The four original pool CDs got claimed. Two were already running; we started the other two.
// We create five new CDs to satisfy the pool size plus the additional claim.
// Of those, we start two for runningCount, and hibernate the rest.
// (Intuitively, perhaps the one for the excess claim should start off in running state.
// But that's not how the logic works today. Should it?)
expectedRunning: 6,
// Of those, we start two for runningCount, and one more for the excess claim.
expectedRunning: 7,
expectedAssignedCDs: 4,
expectedAssignedClaims: 4,
expectedUnassignedClaims: 1,
Expand Down