diff --git a/Makefile b/Makefile index 93ef07cef..d4dba776a 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ else -e "GO111MODULE=$(GO111MODULE)" \ -e "GOFLAGS=$(GOFLAGS)" \ -e "GOPROXY=$(GOPROXY)" \ - openshift/origin-release:golang-1.18 + registry.ci.openshift.org/openshift/release:golang-1.18 IMAGE_BUILD_CMD = $(ENGINE) build endif diff --git a/pkg/autoscaler/autoscaler.go b/pkg/autoscaler/autoscaler.go index 1afe68bc2..6f8266fca 100644 --- a/pkg/autoscaler/autoscaler.go +++ b/pkg/autoscaler/autoscaler.go @@ -46,7 +46,7 @@ func clusterAutoscalerResource(maxNodesTotal int) *caov1.ClusterAutoscaler { // and that has high least common multiple to avoid a case // when a node is considered to be empty even if there are // pods already scheduled and running on the node. - unneededTimeString := "23s" + unneededTimeString := "60s" return &caov1.ClusterAutoscaler{ ObjectMeta: metav1.ObjectMeta{ Name: "default", @@ -293,8 +293,9 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() { Expect(client.Create(ctx, asr)).Should(Succeed()) cleanupObjects[asr.GetName()] = asr - By(fmt.Sprintf("Creating scale-out workload: jobs: %v, memory: %s", expectedReplicas, workloadMemRequest.String())) - workload := framework.NewWorkLoad(expectedReplicas, workloadMemRequest, workloadJobName, autoscalingTestLabel, targetedNodeLabel, "") + uniqueJobName := fmt.Sprintf("%s-scale-from-zero", workloadJobName) + By(fmt.Sprintf("Creating scale-out workload %s: jobs: %v, memory: %s", uniqueJobName, expectedReplicas, workloadMemRequest.String())) + workload := framework.NewWorkLoad(expectedReplicas, workloadMemRequest, uniqueJobName, autoscalingTestLabel, targetedNodeLabel, "") cleanupObjects[workload.GetName()] = workload Expect(client.Create(ctx, workload)).Should(Succeed()) @@ -353,9 +354,10 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() { } jobReplicas := expectedReplicas * int32(2) - By(fmt.Sprintf("Creating scale-out workload: jobs: %v, memory: %s", - jobReplicas, workloadMemRequest.String())) - workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, workloadJobName, autoscalingTestLabel, targetedNodeLabel, "") + uniqueJobName := fmt.Sprintf("%s-cleanup-after-scale-down", workloadJobName) + By(fmt.Sprintf("Creating scale-out workload %s: jobs: %v, memory: %s", + uniqueJobName, jobReplicas, workloadMemRequest.String())) + workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, uniqueJobName, autoscalingTestLabel, targetedNodeLabel, "") cleanupObjects[workload.GetName()] = workload Expect(client.Create(ctx, workload)).Should(Succeed()) @@ -506,17 +508,21 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() { // to the maximum MachineSet size this will create enough demand to // grow the cluster to maximum size. jobReplicas := maxMachineSetReplicas - By(fmt.Sprintf("Creating scale-out workload: jobs: %v, memory: %s", - jobReplicas, workloadMemRequest.String())) - workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, workloadJobName, autoscalingTestLabel, targetedNodeLabel, "") + uniqueJobName := fmt.Sprintf("%s-scale-to-maxnodestotal", workloadJobName) + By(fmt.Sprintf("Creating scale-out workload %s: jobs: %v, memory: %s", + uniqueJobName, jobReplicas, workloadMemRequest.String())) + workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, uniqueJobName, autoscalingTestLabel, targetedNodeLabel, "") cleanupObjects[workload.GetName()] = workload Expect(client.Create(ctx, workload)).Should(Succeed()) // At this point the autoscaler should be growing the cluster, we // wait until the cluster has grown to reach MaxNodesTotal size. + // Because the autoscaler will ignore nodes that are not ready or unschedulable, + // we need to check against the number of ready nodes in the cluster since + // previous tests might have left nodes that are not ready or unschedulable. By(fmt.Sprintf("Waiting for cluster to scale up to %d nodes", caMaxNodesTotal)) Eventually(func() (bool, error) { - nodes, err := framework.GetNodes(client) + nodes, err := framework.GetReadyAndSchedulableNodes(client) return len(nodes) == caMaxNodesTotal, err }, framework.WaitLong, pollingInterval).Should(BeTrue()) @@ -528,9 +534,12 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() { // Now that the cluster has reached maximum size, we want to ensure // that it doesn't try to grow larger. + // Because the autoscaler will ignore nodes that are not ready or unschedulable, + // we need to check against the number of ready nodes in the cluster since + // previous tests might have left nodes that are not ready or unschedulable. By("Watching Cluster node count to ensure it remains consistent") Consistently(func() (bool, error) { - nodes, err := framework.GetNodes(client) + nodes, err := framework.GetReadyAndSchedulableNodes(client) return len(nodes) == caMaxNodesTotal, err }, framework.WaitShort, pollingInterval).Should(BeTrue()) @@ -599,9 +608,10 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() { // expand its size by 2 nodes. the cluster autoscaler should // place 1 node in each of the 2 MachineSets created. jobReplicas := int32(4) - By(fmt.Sprintf("Creating scale-out workload: jobs: %v, memory: %s", - jobReplicas, workloadMemRequest.String())) - workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, workloadJobName, autoscalingTestLabel, targetedNodeLabel, "") + uniqueJobName := fmt.Sprintf("%s-balance-nodegroups", workloadJobName) + By(fmt.Sprintf("Creating scale-out workload %s: jobs: %v, memory: %s", + uniqueJobName, jobReplicas, workloadMemRequest.String())) + workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, uniqueJobName, autoscalingTestLabel, targetedNodeLabel, "") cleanupObjects[workload.GetName()] = workload Expect(client.Create(ctx, workload)).Should(Succeed()) diff --git a/pkg/framework/framework.go b/pkg/framework/framework.go index b277ad695..cee9f4fc1 100644 --- a/pkg/framework/framework.go +++ b/pkg/framework/framework.go @@ -29,12 +29,13 @@ const ( RetryMedium = 5 * time.Second // DefaultMachineSetReplicas is the default number of replicas of a machineset // if MachineSet.Spec.Replicas field is set to nil - DefaultMachineSetReplicas = 0 - MachinePhaseRunning = "Running" - MachinePhaseFailed = "Failed" - MachineRoleLabel = "machine.openshift.io/cluster-api-machine-role" - MachineTypeLabel = "machine.openshift.io/cluster-api-machine-type" - MachineAnnotationKey = "machine.openshift.io/machine" + DefaultMachineSetReplicas = 0 + MachinePhaseRunning = "Running" + MachinePhaseFailed = "Failed" + MachineRoleLabel = "machine.openshift.io/cluster-api-machine-role" + MachineTypeLabel = "machine.openshift.io/cluster-api-machine-type" + MachineAnnotationKey = "machine.openshift.io/machine" + ClusterAPIActuatorPkgTaint = "cluster-api-actuator-pkg" ) var ( diff --git a/pkg/framework/jobs.go b/pkg/framework/jobs.go index 6d481323f..486df640c 100644 --- a/pkg/framework/jobs.go +++ b/pkg/framework/jobs.go @@ -41,6 +41,10 @@ func NewWorkLoad(njobs int32, memoryRequest resource.Quantity, workloadJobName s Key: "kubemark", Operator: corev1.TolerationOpExists, }, + { + Key: ClusterAPIActuatorPkgTaint, + Effect: corev1.TaintEffectPreferNoSchedule, + }, }, }, }, diff --git a/pkg/framework/machinesets.go b/pkg/framework/machinesets.go index 32e012701..7c73eed19 100644 --- a/pkg/framework/machinesets.go +++ b/pkg/framework/machinesets.go @@ -8,6 +8,7 @@ import ( . "github.com/onsi/gomega" machinev1 "github.com/openshift/api/machine/v1beta1" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -28,6 +29,7 @@ type MachineSetParams struct { Name string Replicas int32 Labels map[string]string + Taints []corev1.Taint ProviderSpec *machinev1.ProviderSpec } @@ -59,6 +61,12 @@ func BuildMachineSetParams(client runtimeclient.Client, replicas int) MachineSet "e2e.openshift.io": uid.String(), ClusterKey: clusterName, }, + Taints: []corev1.Taint{ + corev1.Taint{ + Key: ClusterAPIActuatorPkgTaint, + Effect: corev1.TaintEffectPreferNoSchedule, + }, + }, } } @@ -87,6 +95,7 @@ func CreateMachineSet(c client.Client, params MachineSetParams) (*machinev1.Mach Labels: params.Labels, }, ProviderSpec: *params.ProviderSpec, + Taints: params.Taints, }, }, Replicas: pointer.Int32Ptr(params.Replicas), diff --git a/pkg/framework/nodes.go b/pkg/framework/nodes.go index a6ffd60ec..40914f3b0 100644 --- a/pkg/framework/nodes.go +++ b/pkg/framework/nodes.go @@ -26,7 +26,7 @@ func AddNodeCondition(c client.Client, node *corev1.Node, cond corev1.NodeCondit return c.Status().Patch(context.Background(), nodeCopy, client.MergeFrom(node)) } -// FilterReadyNodes fileter the list of nodes and returns the list with ready nodes +// FilterReadyNodes filters the list of nodes and returns a list with ready nodes func FilterReadyNodes(nodes []corev1.Node) []corev1.Node { var readyNodes []corev1.Node for _, n := range nodes { @@ -37,6 +37,17 @@ func FilterReadyNodes(nodes []corev1.Node) []corev1.Node { return readyNodes } +// FilterSchedulableNodes filters the list of nodes and returns a list with schedulable nodes +func FilterSchedulableNodes(nodes []corev1.Node) []corev1.Node { + var schedulableNodes []corev1.Node + for _, n := range nodes { + if IsNodeSchedulable(&n) { + schedulableNodes = append(schedulableNodes, n) + } + } + return schedulableNodes +} + // GetNodes gets a list of nodes from a running cluster // Optionaly, labels may be used to constrain listed nodes. func GetNodes(c client.Client, selectors ...*metav1.LabelSelector) ([]corev1.Node, error) { @@ -102,6 +113,19 @@ func GetNodeForMachine(c client.Client, m *machinev1.Machine) (*corev1.Node, err return node, nil } +// GetReadyAndSchedulableNodes returns all the nodes that have the Ready condition and can schedule workloads +func GetReadyAndSchedulableNodes(c client.Client) ([]corev1.Node, error) { + nodes, err := GetNodes(c) + if err != nil { + return nodes, err + } + + nodes = FilterReadyNodes(nodes) + nodes = FilterSchedulableNodes(nodes) + + return nodes, nil +} + // GetWorkerNodes returns all nodes with the nodeWorkerRoleLabel label func GetWorkerNodes(c client.Client) ([]corev1.Node, error) { workerNodes := &corev1.NodeList{} @@ -127,6 +151,14 @@ func IsNodeReady(node *corev1.Node) bool { return false } +// IsNodeSchedulable returns true is the given node can schedule workloads. +func IsNodeSchedulable(node *corev1.Node) bool { + if node.Spec.Unschedulable { + return false + } + return true +} + // NodesAreReady returns true if an array of nodes are all ready func NodesAreReady(nodes []*corev1.Node) bool { // All nodes needs to be ready