openshift · openshift-merge-robot · Oct 27, 2022 · Sep 27, 2022 · Sep 28, 2022 · Sep 29, 2022
diff --git a/Makefile b/Makefile
@@ -33,7 +33,7 @@ else
 	  -e "GO111MODULE=$(GO111MODULE)" \
 	  -e "GOFLAGS=$(GOFLAGS)" \
 	  -e "GOPROXY=$(GOPROXY)" \
-	  openshift/origin-release:golang-1.18
+	  registry.ci.openshift.org/openshift/release:golang-1.18
   IMAGE_BUILD_CMD = $(ENGINE) build
 endif
 

diff --git a/pkg/autoscaler/autoscaler.go b/pkg/autoscaler/autoscaler.go
@@ -46,7 +46,7 @@ func clusterAutoscalerResource(maxNodesTotal int) *caov1.ClusterAutoscaler {
 	// and that has high least common multiple to avoid a case
 	// when a node is considered to be empty even if there are
 	// pods already scheduled and running on the node.
-	unneededTimeString := "23s"
+	unneededTimeString := "60s"
 	return &caov1.ClusterAutoscaler{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "default",
@@ -293,8 +293,9 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() {
 			Expect(client.Create(ctx, asr)).Should(Succeed())
 			cleanupObjects[asr.GetName()] = asr
 
-			By(fmt.Sprintf("Creating scale-out workload: jobs: %v, memory: %s", expectedReplicas, workloadMemRequest.String()))
-			workload := framework.NewWorkLoad(expectedReplicas, workloadMemRequest, workloadJobName, autoscalingTestLabel, targetedNodeLabel, "")
+			uniqueJobName := fmt.Sprintf("%s-scale-from-zero", workloadJobName)
+			By(fmt.Sprintf("Creating scale-out workload %s: jobs: %v, memory: %s", uniqueJobName, expectedReplicas, workloadMemRequest.String()))
+			workload := framework.NewWorkLoad(expectedReplicas, workloadMemRequest, uniqueJobName, autoscalingTestLabel, targetedNodeLabel, "")
 			cleanupObjects[workload.GetName()] = workload
 			Expect(client.Create(ctx, workload)).Should(Succeed())
 
@@ -353,9 +354,10 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() {
 			}
 
 			jobReplicas := expectedReplicas * int32(2)
-			By(fmt.Sprintf("Creating scale-out workload: jobs: %v, memory: %s",
-				jobReplicas, workloadMemRequest.String()))
-			workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, workloadJobName, autoscalingTestLabel, targetedNodeLabel, "")
+			uniqueJobName := fmt.Sprintf("%s-cleanup-after-scale-down", workloadJobName)
+			By(fmt.Sprintf("Creating scale-out workload %s: jobs: %v, memory: %s",
+				uniqueJobName, jobReplicas, workloadMemRequest.String()))
+			workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, uniqueJobName, autoscalingTestLabel, targetedNodeLabel, "")
 			cleanupObjects[workload.GetName()] = workload
 			Expect(client.Create(ctx, workload)).Should(Succeed())
 
@@ -506,17 +508,21 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() {
 			// to the maximum MachineSet size this will create enough demand to
 			// grow the cluster to maximum size.
 			jobReplicas := maxMachineSetReplicas
-			By(fmt.Sprintf("Creating scale-out workload: jobs: %v, memory: %s",
-				jobReplicas, workloadMemRequest.String()))
-			workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, workloadJobName, autoscalingTestLabel, targetedNodeLabel, "")
+			uniqueJobName := fmt.Sprintf("%s-scale-to-maxnodestotal", workloadJobName)
+			By(fmt.Sprintf("Creating scale-out workload %s: jobs: %v, memory: %s",
+				uniqueJobName, jobReplicas, workloadMemRequest.String()))
+			workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, uniqueJobName, autoscalingTestLabel, targetedNodeLabel, "")
 			cleanupObjects[workload.GetName()] = workload
 			Expect(client.Create(ctx, workload)).Should(Succeed())
 
 			// At this point the autoscaler should be growing the cluster, we
 			// wait until the cluster has grown to reach MaxNodesTotal size.
+			// Because the autoscaler will ignore nodes that are not ready or unschedulable,
+			// we need to check against the number of ready nodes in the cluster since
+			// previous tests might have left nodes that are not ready or unschedulable.
 			By(fmt.Sprintf("Waiting for cluster to scale up to %d nodes", caMaxNodesTotal))
 			Eventually(func() (bool, error) {
-				nodes, err := framework.GetNodes(client)
+				nodes, err := framework.GetReadyAndSchedulableNodes(client)
 				return len(nodes) == caMaxNodesTotal, err
 			}, framework.WaitLong, pollingInterval).Should(BeTrue())
 
@@ -528,9 +534,12 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() {
 
 			// Now that the cluster has reached maximum size, we want to ensure
 			// that it doesn't try to grow larger.
+			// Because the autoscaler will ignore nodes that are not ready or unschedulable,
+			// we need to check against the number of ready nodes in the cluster since
+			// previous tests might have left nodes that are not ready or unschedulable.
 			By("Watching Cluster node count to ensure it remains consistent")
 			Consistently(func() (bool, error) {
-				nodes, err := framework.GetNodes(client)
+				nodes, err := framework.GetReadyAndSchedulableNodes(client)
 				return len(nodes) == caMaxNodesTotal, err
 			}, framework.WaitShort, pollingInterval).Should(BeTrue())
 
@@ -599,9 +608,10 @@ var _ = Describe("[Feature:Machines] Autoscaler should", func() {
 			// expand its size by 2 nodes. the cluster autoscaler should
 			// place 1 node in each of the 2 MachineSets created.
 			jobReplicas := int32(4)
-			By(fmt.Sprintf("Creating scale-out workload: jobs: %v, memory: %s",
-				jobReplicas, workloadMemRequest.String()))
-			workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, workloadJobName, autoscalingTestLabel, targetedNodeLabel, "")
+			uniqueJobName := fmt.Sprintf("%s-balance-nodegroups", workloadJobName)
+			By(fmt.Sprintf("Creating scale-out workload %s: jobs: %v, memory: %s",
+				uniqueJobName, jobReplicas, workloadMemRequest.String()))
+			workload := framework.NewWorkLoad(jobReplicas, workloadMemRequest, uniqueJobName, autoscalingTestLabel, targetedNodeLabel, "")
 			cleanupObjects[workload.GetName()] = workload
 			Expect(client.Create(ctx, workload)).Should(Succeed())
 

diff --git a/pkg/framework/framework.go b/pkg/framework/framework.go
@@ -29,12 +29,13 @@ const (
 	RetryMedium             = 5 * time.Second
 	// DefaultMachineSetReplicas is the default number of replicas of a machineset
 	// if MachineSet.Spec.Replicas field is set to nil
-	DefaultMachineSetReplicas = 0
-	MachinePhaseRunning       = "Running"
-	MachinePhaseFailed        = "Failed"
-	MachineRoleLabel          = "machine.openshift.io/cluster-api-machine-role"
-	MachineTypeLabel          = "machine.openshift.io/cluster-api-machine-type"
-	MachineAnnotationKey      = "machine.openshift.io/machine"
+	DefaultMachineSetReplicas  = 0
+	MachinePhaseRunning        = "Running"
+	MachinePhaseFailed         = "Failed"
+	MachineRoleLabel           = "machine.openshift.io/cluster-api-machine-role"
+	MachineTypeLabel           = "machine.openshift.io/cluster-api-machine-type"
+	MachineAnnotationKey       = "machine.openshift.io/machine"
+	ClusterAPIActuatorPkgTaint = "cluster-api-actuator-pkg"
 )
 
 var (

diff --git a/pkg/framework/jobs.go b/pkg/framework/jobs.go
@@ -41,6 +41,10 @@ func NewWorkLoad(njobs int32, memoryRequest resource.Quantity, workloadJobName s
 							Key:      "kubemark",
 							Operator: corev1.TolerationOpExists,
 						},
+						{
+							Key:    ClusterAPIActuatorPkgTaint,
+							Effect: corev1.TaintEffectPreferNoSchedule,
+						},
 					},
 				},
 			},

diff --git a/pkg/framework/machinesets.go b/pkg/framework/machinesets.go
@@ -8,6 +8,7 @@ import (
 	. "github.com/onsi/gomega"
 
 	machinev1 "github.com/openshift/api/machine/v1beta1"
+	corev1 "k8s.io/api/core/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
@@ -28,6 +29,7 @@ type MachineSetParams struct {
 	Name         string
 	Replicas     int32
 	Labels       map[string]string
+	Taints       []corev1.Taint
 	ProviderSpec *machinev1.ProviderSpec
 }
 
@@ -59,6 +61,12 @@ func BuildMachineSetParams(client runtimeclient.Client, replicas int) MachineSet
 			"e2e.openshift.io": uid.String(),
 			ClusterKey:         clusterName,
 		},
+		Taints: []corev1.Taint{
+			corev1.Taint{
+				Key:    ClusterAPIActuatorPkgTaint,
+				Effect: corev1.TaintEffectPreferNoSchedule,
+			},
+		},
 	}
 }
 
@@ -87,6 +95,7 @@ func CreateMachineSet(c client.Client, params MachineSetParams) (*machinev1.Mach
 						Labels: params.Labels,
 					},
 					ProviderSpec: *params.ProviderSpec,
+					Taints:       params.Taints,
 				},
 			},
 			Replicas: pointer.Int32Ptr(params.Replicas),

diff --git a/pkg/framework/nodes.go b/pkg/framework/nodes.go
@@ -26,7 +26,7 @@ func AddNodeCondition(c client.Client, node *corev1.Node, cond corev1.NodeCondit
 	return c.Status().Patch(context.Background(), nodeCopy, client.MergeFrom(node))
 }
 
-// FilterReadyNodes fileter the list of nodes and returns the list with ready nodes
+// FilterReadyNodes filters the list of nodes and returns a list with ready nodes
 func FilterReadyNodes(nodes []corev1.Node) []corev1.Node {
 	var readyNodes []corev1.Node
 	for _, n := range nodes {
@@ -37,6 +37,17 @@ func FilterReadyNodes(nodes []corev1.Node) []corev1.Node {
 	return readyNodes
 }
 
+// FilterSchedulableNodes filters the list of nodes and returns a list with schedulable nodes
+func FilterSchedulableNodes(nodes []corev1.Node) []corev1.Node {
+	var schedulableNodes []corev1.Node
+	for _, n := range nodes {
+		if IsNodeSchedulable(&n) {
+			schedulableNodes = append(schedulableNodes, n)
+		}
+	}
+	return schedulableNodes
+}
+
 // GetNodes gets a list of nodes from a running cluster
 // Optionaly, labels may be used to constrain listed nodes.
 func GetNodes(c client.Client, selectors ...*metav1.LabelSelector) ([]corev1.Node, error) {
@@ -102,6 +113,19 @@ func GetNodeForMachine(c client.Client, m *machinev1.Machine) (*corev1.Node, err
 	return node, nil
 }
 
+// GetReadyAndSchedulableNodes returns all the nodes that have the Ready condition and can schedule workloads
+func GetReadyAndSchedulableNodes(c client.Client) ([]corev1.Node, error) {
+	nodes, err := GetNodes(c)
+	if err != nil {
+		return nodes, err
+	}
+
+	nodes = FilterReadyNodes(nodes)
+	nodes = FilterSchedulableNodes(nodes)
+
+	return nodes, nil
+}
+
 // GetWorkerNodes returns all nodes with the nodeWorkerRoleLabel label
 func GetWorkerNodes(c client.Client) ([]corev1.Node, error) {
 	workerNodes := &corev1.NodeList{}
@@ -127,6 +151,14 @@ func IsNodeReady(node *corev1.Node) bool {
 	return false
 }
 
+// IsNodeSchedulable returns true is the given node can schedule workloads.
+func IsNodeSchedulable(node *corev1.Node) bool {
+	if node.Spec.Unschedulable {
+		return false
+	}
+	return true
+}
+
 // NodesAreReady returns true if an array of nodes are all ready
 func NodesAreReady(nodes []*corev1.Node) bool {
 	// All nodes needs to be ready