Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 test: improve autoscale tests for to/from zero and running autoscaler in bootstrap cluster #11082

Merged
merged 10 commits into from
Aug 26, 2024
250 changes: 180 additions & 70 deletions test/e2e/autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,20 @@ type AutoscalerSpecInput struct {
InfrastructureMachineTemplateKind string
InfrastructureMachinePoolTemplateKind string
InfrastructureMachinePoolKind string
InfrastructureAPIGroup string
AutoscalerVersion string

// InstallOnManagementCluster steers if the autoscaler should get installed to the management or workload cluster.
// Depending on the CI environments, there may be no connectivity from the workload to the management cluster.
InstallOnManagementCluster bool

// ScaleToAndFromZero enables tests to scale to and from zero.
// To enable `ScaleToAndFromZero` the following needs to be implemented:
// * either provide the relevant annotations on the MachineDeployment or MachinePool
// * for MachineDeployments: implement .status.capacity on the InfraMachineTemplate
// * for MachinePools: implement .status.capacity on the InfraMachinePool
ScaleToAndFromZero bool

// Allows to inject a function to be run after test namespace is created.
// If not specified, this is a no-op.
PostNamespaceCreated func(managementClusterProxy framework.ClusterProxy, workloadClusterNamespace string)
Expand Down Expand Up @@ -115,6 +127,8 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
infrastructureProvider = *input.InfrastructureProvider
}

hasMachinePool := input.InfrastructureMachinePoolTemplateKind != ""

clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
ConfigCluster: clusterctl.ConfigClusterInput{
Expand All @@ -137,6 +151,7 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
}, clusterResources)

Expect(clusterResources.Cluster.Spec.Topology).NotTo(BeNil(), "Autoscaler test expected a Classy Cluster")

// Ensure the MachineDeploymentTopology has the autoscaler annotations.
mdTopology := clusterResources.Cluster.Spec.Topology.Workers.MachineDeployments[0]
Expect(mdTopology.Metadata.Annotations).NotTo(BeNil(), "MachineDeployment is expected to have autoscaler annotations")
Expand All @@ -145,21 +160,27 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
mdNodeGroupMaxSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
Expect(ok).To(BeTrue(), "MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, clusterv1.AutoscalerMaxSizeAnnotation)

// Ensure the MachinePoolTopology does NOT have the autoscaler annotations so we can test MachineDeployments first.
mpTopology := clusterResources.Cluster.Spec.Topology.Workers.MachinePools[0]
if mpTopology.Metadata.Annotations != nil {
_, ok = mpTopology.Metadata.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
Expect(ok).To(BeFalse(), "MachinePoolTopology %s does have the %q autoscaler annotation", mpTopology.Name, clusterv1.AutoscalerMinSizeAnnotation)
_, ok = mpTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
Expect(ok).To(BeFalse(), "MachinePoolTopology %s does have the %q autoscaler annotation", mpTopology.Name, clusterv1.AutoscalerMaxSizeAnnotation)
if hasMachinePool {
// Ensure the MachinePoolTopology does NOT have the autoscaler annotations so we can test MachineDeployments first.
mpTopology := clusterResources.Cluster.Spec.Topology.Workers.MachinePools[0]
if mpTopology.Metadata.Annotations != nil {
_, ok = mpTopology.Metadata.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
Expect(ok).To(BeFalse(), "MachinePoolTopology %s does have the %q autoscaler annotation", mpTopology.Name, clusterv1.AutoscalerMinSizeAnnotation)
_, ok = mpTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
Expect(ok).To(BeFalse(), "MachinePoolTopology %s does have the %q autoscaler annotation", mpTopology.Name, clusterv1.AutoscalerMaxSizeAnnotation)
}
}

// Get a ClusterProxy so we can interact with the workload cluster
workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, clusterResources.Cluster.Namespace, clusterResources.Cluster.Name)
mdOriginalReplicas := *clusterResources.MachineDeployments[0].Spec.Replicas
Expect(strconv.Itoa(int(mdOriginalReplicas))).To(Equal(mdNodeGroupMinSize), "MachineDeployment should have replicas as defined in %s", clusterv1.AutoscalerMinSizeAnnotation)
mpOriginalReplicas := *clusterResources.MachinePools[0].Spec.Replicas
Expect(int(mpOriginalReplicas)).To(Equal(1), "MachinePool should default to 1 replica via the MachinePool webhook")

var mpOriginalReplicas int32
if hasMachinePool {
mpOriginalReplicas = *clusterResources.MachinePools[0].Spec.Replicas
Expect(int(mpOriginalReplicas)).To(Equal(1), "MachinePool should default to 1 replica via the MachinePool webhook")
}

By("Installing the autoscaler on the workload cluster")
autoscalerWorkloadYAMLPath := input.E2EConfig.GetVariable(AutoscalerWorkloadYAMLPath)
Expand All @@ -168,11 +189,13 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
InfrastructureMachineTemplateKind: input.InfrastructureMachineTemplateKind,
InfrastructureMachinePoolTemplateKind: input.InfrastructureMachinePoolTemplateKind,
InfrastructureMachinePoolKind: input.InfrastructureMachinePoolKind,
InfrastructureAPIGroup: input.InfrastructureAPIGroup,
WorkloadYamlPath: autoscalerWorkloadYAMLPath,
ManagementClusterProxy: input.BootstrapClusterProxy,
WorkloadClusterProxy: workloadClusterProxy,
Cluster: clusterResources.Cluster,
AutoscalerVersion: input.AutoscalerVersion,
AutoscalerOnManagementCluster: input.InstallOnManagementCluster,
}, input.E2EConfig.GetIntervals(specName, "wait-controllers")...)

By("Creating workload that forces the system to scale up")
Expand Down Expand Up @@ -226,6 +249,46 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

if input.ScaleToAndFromZero {
By("Enabling autoscaler for the MachineDeployment to zero")
// Enable autoscaler on the MachineDeployment.
framework.EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.EnableAutoscalerForMachineDeploymentTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: "0",
NodeGroupMaxSize: mdNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Scaling the MachineDeployment scale up deployment to zero")
framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{
ClusterProxy: workloadClusterProxy,
Replicas: mpOriginalReplicas + 0,
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)

By("Checking the MachineDeployment finished scaling down to zero")
framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachineDeployment: clusterResources.MachineDeployments[0],
Replicas: 0,
WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("Scaling the MachineDeployment scale up deployment to 1")
framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{
ClusterProxy: workloadClusterProxy,
Replicas: mpOriginalReplicas + 1,
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)

By("Checking the MachineDeployment finished scaling up")
framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachineDeployment: clusterResources.MachineDeployments[0],
Replicas: 1,
WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})
}

By("Disabling the autoscaler for MachineDeployments to test MachinePools")
framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineDeploymentTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Expand All @@ -239,67 +302,114 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput)
WaitForDelete: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Enabling autoscaler for the MachinePool")
// Enable autoscaler on the MachinePool.
framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: mpNodeGroupMinSize,
NodeGroupMaxSize: mpNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Creating workload that forces the system to scale up")
framework.AddScaleUpDeploymentAndWait(ctx, framework.AddScaleUpDeploymentAndWaitInput{
ClusterProxy: workloadClusterProxy,
}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)

By("Checking the MachinePool is scaled up")
mpScaledUpReplicas := mpOriginalReplicas + 1
framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachinePool: clusterResources.MachinePools[0],
Replicas: mpScaledUpReplicas,
WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Disabling the autoscaler")
framework.DisableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.DisableAutoscalerForMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("Checking we can manually scale up the MachinePool")
// Scale up the MachinePool. Since autoscaler is disabled we should be able to do this.
mpExcessReplicas := mpScaledUpReplicas + 1
framework.ScaleMachinePoolTopologyAndWait(ctx, framework.ScaleMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
Replicas: mpExcessReplicas,
WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
Getter: input.BootstrapClusterProxy.GetClient(),
})

By("Checking enabling autoscaler will scale down the MachinePool to correct size")
// Enable autoscaler on the MachinePool.
framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: mpNodeGroupMinSize,
NodeGroupMaxSize: mpNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Checking the MachinePool is scaled down")
// Since we scaled up the MachinePool manually and the workload has not changed auto scaler
// should detect that there are unneeded nodes and scale down the MachinePool.
framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachinePool: clusterResources.MachinePools[0],
Replicas: mpScaledUpReplicas,
WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})
if hasMachinePool {
By("Enabling autoscaler for the MachinePool")
// Enable autoscaler on the MachinePool.
framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: mpNodeGroupMinSize,
NodeGroupMaxSize: mpNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Creating workload that forces the system to scale up")
framework.AddScaleUpDeploymentAndWait(ctx, framework.AddScaleUpDeploymentAndWaitInput{
ClusterProxy: workloadClusterProxy,
}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)

By("Checking the MachinePool is scaled up")
mpScaledUpReplicas := mpOriginalReplicas + 1
framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachinePool: clusterResources.MachinePools[0],
Replicas: mpScaledUpReplicas,
WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Disabling the autoscaler")
framework.DisableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.DisableAutoscalerForMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("Checking we can manually scale up the MachinePool")
// Scale up the MachinePool. Since autoscaler is disabled we should be able to do this.
mpExcessReplicas := mpScaledUpReplicas + 1
framework.ScaleMachinePoolTopologyAndWait(ctx, framework.ScaleMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
Replicas: mpExcessReplicas,
WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
Getter: input.BootstrapClusterProxy.GetClient(),
})

By("Checking enabling autoscaler will scale down the MachinePool to correct size")
// Enable autoscaler on the MachinePool.
framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: mpNodeGroupMinSize,
NodeGroupMaxSize: mpNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

By("Checking the MachinePool is scaled down")
// Since we scaled up the MachinePool manually and the workload has not changed auto scaler
// should detect that there are unneeded nodes and scale down the MachinePool.
framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachinePool: clusterResources.MachinePools[0],
Replicas: mpScaledUpReplicas,
WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

if input.ScaleToAndFromZero {
chrischdi marked this conversation as resolved.
Show resolved Hide resolved
By("Enabling autoscaler for the MachinePool to zero")
// Enable autoscaler on the MachinePool.
framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
Cluster: clusterResources.Cluster,
NodeGroupMinSize: "0",
NodeGroupMaxSize: mpNodeGroupMaxSize,
WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"),
})

// We can savely assume that mdReplicas is 1.
var mdReplicas int32 = 1

By("Scaling the MachinePool scale up deployment to 1")
framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{
ClusterProxy: workloadClusterProxy,
// Set replicas to 1, because we still have 1 Machine from MachineDeployments.
Replicas: 1,
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)

By("Checking the MachinePool finished scaling down to zero")
framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachinePool: clusterResources.MachinePools[0],
Replicas: mdReplicas,
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})

By("Scaling the MachinePool scale up deployment to 2")
framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{
ClusterProxy: workloadClusterProxy,
// Set replicas to 2, because we still have 1 Machine from MachineDeployments.
Replicas: mdReplicas + 1,
}, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...)

By("Checking the MachineDeployment finished scaling up")
framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{
Getter: input.BootstrapClusterProxy.GetClient(),
MachinePool: clusterResources.MachinePools[0],
Replicas: 1,
WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"),
})
}
}

By("PASSED!")
})
Expand Down
Loading
Loading