diff --git a/pkg/controllers/disruption/controller.go b/pkg/controllers/disruption/controller.go index 519a3e29c7..990e64caed 100644 --- a/pkg/controllers/disruption/controller.go +++ b/pkg/controllers/disruption/controller.go @@ -83,10 +83,10 @@ func NewController(clk clock.Clock, kubeClient client.Client, provisioner *provi cloudProvider: cp, lastRun: map[string]time.Time{}, methods: []Method{ - // Terminate any NodeClaims that have drifted from provisioning specifications, allowing the pods to reschedule. - NewDrift(kubeClient, cluster, provisioner, recorder), // Delete any empty NodeClaims as there is zero cost in terms of disruption. NewEmptiness(c), + // Terminate any NodeClaims that have drifted from provisioning specifications, allowing the pods to reschedule. + NewDrift(kubeClient, cluster, provisioner, recorder), // Attempt to identify multiple NodeClaims that we can consolidate simultaneously to reduce pod churn NewMultiNodeConsolidation(c), // And finally fall back our single NodeClaim consolidation to further reduce cluster cost. diff --git a/pkg/controllers/disruption/drift.go b/pkg/controllers/disruption/drift.go index 7c4bbdfc20..c4aed2b8e2 100644 --- a/pkg/controllers/disruption/drift.go +++ b/pkg/controllers/disruption/drift.go @@ -62,29 +62,14 @@ func (d *Drift) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[ candidates[j].NodeClaim.StatusConditions().Get(string(d.Reason())).LastTransitionTime.Time) }) - // Do a quick check through the candidates to see if they're empty. - // For each candidate that is empty with a nodePool allowing its disruption - // add it to the existing command. - empty := make([]*Candidate, 0, len(candidates)) for _, candidate := range candidates { - if len(candidate.reschedulablePods) > 0 { + // Filter out empty candidates. If there was an empty node that wasn't consolidated before this, we should + // assume that it was due to budgets. If we don't filter out budgets, users who set a budget for `empty` + // can find their nodes disrupted here, which while that in itself isn't an issue for empty nodes, it could + // constrain the `drift` budget. + if len(candidate.reschedulablePods) == 0 { continue } - // If there's disruptions allowed for the candidate's nodepool, - // add it to the list of candidates, and decrement the budget. - if disruptionBudgetMapping[candidate.NodePool.Name] > 0 { - empty = append(empty, candidate) - disruptionBudgetMapping[candidate.NodePool.Name]-- - } - } - // Disrupt all empty drifted candidates, as they require no scheduling simulations. - if len(empty) > 0 { - return Command{ - candidates: empty, - }, scheduling.Results{}, nil - } - - for _, candidate := range candidates { // If the disruption budget doesn't allow this candidate to be disrupted, // continue to the next candidate. We don't need to decrement any budget // counter since drift commands can only have one candidate. diff --git a/pkg/controllers/disruption/drift_test.go b/pkg/controllers/disruption/drift_test.go index 7209f09d2f..1a98f567ab 100644 --- a/pkg/controllers/disruption/drift_test.go +++ b/pkg/controllers/disruption/drift_test.go @@ -122,8 +122,8 @@ var _ = Describe("Drift", func() { ExpectApplied(ctx, env.Client, rs) Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) }) - It("should allow all empty nodes to be disrupted", func() { - nodeClaims, nodes = test.NodeClaimsAndNodes(numNodes, v1.NodeClaim{ + It("should not consider empty nodes for drift budgets", func() { + nodeClaims, nodes = test.NodeClaimsAndNodes(2, v1.NodeClaim{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ v1.NodePoolLabelKey: nodePool.Name, @@ -140,67 +140,59 @@ var _ = Describe("Drift", func() { }, }) - nodePool.Spec.Disruption.Budgets = []v1.Budget{{Nodes: "100%"}} + nodePool.Spec.Disruption.Budgets = []v1.Budget{ + {Reasons: []v1.DisruptionReason{v1.DisruptionReasonDrifted}, Nodes: "1"}, + } ExpectApplied(ctx, env.Client, nodePool) - for i := 0; i < numNodes; i++ { + for i := 0; i < 2; i++ { nodeClaims[i].StatusConditions().SetTrue(v1.ConditionTypeDrifted) ExpectApplied(ctx, env.Client, nodeClaims[i], nodes[i]) } - // inform cluster state about nodes and nodeclaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) - ExpectSingletonReconciled(ctx, disruptionController) - - metric, found := FindMetricWithLabelValues("karpenter_nodepools_allowed_disruptions", map[string]string{ - "nodepool": nodePool.Name, - }) - Expect(found).To(BeTrue()) - Expect(metric.GetGauge().GetValue()).To(BeNumerically("==", 10)) + pod := test.Pod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: lo.ToPtr(true), + BlockOwnerDeletion: lo.ToPtr(true), + }, + }}}) - // Execute command, thus deleting all nodes - ExpectSingletonReconciled(ctx, queue) - Expect(len(ExpectNodeClaims(ctx, env.Client))).To(Equal(0)) - }) - It("should allow no empty nodes to be disrupted", func() { - nodeClaims, nodes = test.NodeClaimsAndNodes(numNodes, v1.NodeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1.NodePoolLabelKey: nodePool.Name, - corev1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1.CapacityTypeLabelKey: mostExpensiveOffering.Requirements.Get(v1.CapacityTypeLabelKey).Any(), - corev1.LabelTopologyZone: mostExpensiveOffering.Requirements.Get(corev1.LabelTopologyZone).Any(), - }, - }, - Status: v1.NodeClaimStatus{ - Allocatable: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("32"), - corev1.ResourcePods: resource.MustParse("100"), - }, - }, - }) + emptyNodeClaim, driftedNodeClaim := nodeClaims[0], nodeClaims[1] + driftedNode := nodes[1] + ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) + ExpectApplied(ctx, env.Client, rs, pod, nodePool) - nodePool.Spec.Disruption.Budgets = []v1.Budget{{Nodes: "0%"}} + // bind pod to the node eligible for disruption + ExpectManualBinding(ctx, env.Client, pod, driftedNode) - ExpectApplied(ctx, env.Client, nodePool) - for i := 0; i < numNodes; i++ { - nodeClaims[i].StatusConditions().SetTrue(v1.ConditionTypeDrifted) - ExpectApplied(ctx, env.Client, nodeClaims[i], nodes[i]) - } // inform cluster state about nodes and nodeclaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) ExpectSingletonReconciled(ctx, disruptionController) - metric, found := FindMetricWithLabelValues("karpenter_nodepools_allowed_disruptions", map[string]string{ - "nodepool": nodePool.Name, + // only allow a single node to be disrupted because of budgets + ExpectMetricGaugeValue(disruption.NodePoolAllowedDisruptions, 1, map[string]string{ + metrics.NodePoolLabel: nodePool.Name, + metrics.ReasonLabel: string(v1.DisruptionReasonDrifted), }) - Expect(found).To(BeTrue()) - Expect(metric.GetGauge().GetValue()).To(BeNumerically("==", 0)) - // Execute command, thus deleting no nodes + // Execute command, deleting one node ExpectSingletonReconciled(ctx, queue) - Expect(len(ExpectNodeClaims(ctx, env.Client))).To(Equal(numNodes)) + // Cascade any deletion of the nodeClaim to the node + ExpectNodeClaimsCascadeDeletion(ctx, env.Client, driftedNodeClaim) + + ExpectNotFound(ctx, env.Client, nodeClaim, driftedNode) + ncs := ExpectNodeClaims(ctx, env.Client) + Expect(len(ncs)).To(Equal(1)) + Expect(ncs[0].Name).To(Equal(emptyNodeClaim.Name)) + ExpectMetricCounterValue(disruption.DecisionsPerformedTotal, 1, map[string]string{ + metrics.ReasonLabel: "drifted", + }) }) - It("should only allow 3 empty nodes to be disrupted", func() { + It("should only consider 3 nodes allowed to be disrupted because of budgets", func() { nodeClaims, nodes = test.NodeClaimsAndNodes(numNodes, v1.NodeClaim{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -228,15 +220,11 @@ var _ = Describe("Drift", func() { // inform cluster state about nodes and nodeclaims ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) ExpectSingletonReconciled(ctx, disruptionController) - metric, found := FindMetricWithLabelValues("karpenter_nodepools_allowed_disruptions", map[string]string{ - "nodepool": nodePool.Name, - }) - Expect(found).To(BeTrue()) - Expect(metric.GetGauge().GetValue()).To(BeNumerically("==", 3)) - // Execute command, thus deleting 3 nodes - ExpectSingletonReconciled(ctx, queue) - Expect(len(ExpectNodeClaims(ctx, env.Client))).To(Equal(7)) + ExpectMetricGaugeValue(disruption.NodePoolAllowedDisruptions, 3, map[string]string{ + metrics.NodePoolLabel: nodePool.Name, + metrics.ReasonLabel: string(v1.DisruptionReasonDrifted), + }) }) It("should disrupt 3 nodes, taking into account commands in progress", func() { nodeClaims, nodes = test.NodeClaimsAndNodes(numNodes, v1.NodeClaim{ @@ -310,15 +298,14 @@ var _ = Describe("Drift", func() { } Expect(len(ExpectNodeClaims(ctx, env.Client))).To(Equal(7)) }) - It("should allow 2 nodes from each nodePool to be deleted", func() { + It("should respect budgets for multiple nodepools", func() { // Create 10 nodepools nps := test.NodePools(10, v1.NodePool{ Spec: v1.NodePoolSpec{ Disruption: v1.Disruption{ ConsolidateAfter: v1.MustParseNillableDuration("Never"), Budgets: []v1.Budget{{ - // 1/2 of 3 nodes == 1.5 nodes. This should round up to 2. - Nodes: "50%", + Nodes: "1", }}, }, Template: v1.NodeClaimTemplate{ @@ -334,7 +321,7 @@ var _ = Describe("Drift", func() { } nodeClaims = make([]*v1.NodeClaim, 0, 30) nodes = make([]*corev1.Node, 0, 30) - // Create 3 nodes for each nodePool + // Create 3 nodes for each nodePool, 2 of which have a pod for _, np := range nps { ncs, ns := test.NodeClaimsAndNodes(3, v1.NodeClaim{ ObjectMeta: metav1.ObjectMeta{ @@ -352,91 +339,59 @@ var _ = Describe("Drift", func() { }, }, }) + for i := range ncs { + ncs[i].StatusConditions().SetTrue(v1.ConditionTypeDrifted) + ExpectApplied(ctx, env.Client, ncs[i], ns[i]) + } nodeClaims = append(nodeClaims, ncs...) nodes = append(nodes, ns...) - } - ExpectApplied(ctx, env.Client, nodePool) - for i := 0; i < len(nodeClaims); i++ { - nodeClaims[i].StatusConditions().SetTrue(v1.ConditionTypeDrifted) - ExpectApplied(ctx, env.Client, nodeClaims[i], nodes[i]) - } - - // inform cluster state about nodes and nodeclaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) - ExpectSingletonReconciled(ctx, disruptionController) - - for _, np := range nps { - metric, found := FindMetricWithLabelValues("karpenter_nodepools_allowed_disruptions", map[string]string{ - "nodepool": np.Name, - }) - Expect(found).To(BeTrue()) - Expect(metric.GetGauge().GetValue()).To(BeNumerically("==", 2)) - } - - // Execute the command in the queue, only deleting 20 nodes - ExpectSingletonReconciled(ctx, queue) - Expect(len(ExpectNodeClaims(ctx, env.Client))).To(Equal(10)) - }) - It("should allow all nodes from each nodePool to be deleted", func() { - // Create 10 nodepools - nps := test.NodePools(10, v1.NodePool{ - Spec: v1.NodePoolSpec{ - Disruption: v1.Disruption{ - ConsolidateAfter: v1.MustParseNillableDuration("Never"), - Budgets: []v1.Budget{{ - Nodes: "100%", - }}, - }, - }, - }) - ExpectApplied(ctx, env.Client, nodePool) - for i := 0; i < len(nps); i++ { - ExpectApplied(ctx, env.Client, nps[i]) - } - nodeClaims = make([]*v1.NodeClaim, 0, 30) - nodes = make([]*corev1.Node, 0, 30) - // Create 3 nodes for each nodePool - for _, np := range nps { - ncs, ns := test.NodeClaimsAndNodes(3, v1.NodeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1.NodePoolLabelKey: np.Name, - corev1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1.CapacityTypeLabelKey: mostExpensiveOffering.Requirements.Get(v1.CapacityTypeLabelKey).Any(), - corev1.LabelTopologyZone: mostExpensiveOffering.Requirements.Get(corev1.LabelTopologyZone).Any(), + pods := test.Pods(2, test.PodOptions{ + ResourceRequirements: corev1.ResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceCPU: resource.MustParse("1"), }, }, - Status: v1.NodeClaimStatus{ - Allocatable: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("32"), - corev1.ResourcePods: resource.MustParse("100"), - }, - }, - }) - nodeClaims = append(nodeClaims, ncs...) - nodes = append(nodes, ns...) - } - ExpectApplied(ctx, env.Client, nodePool) - for i := 0; i < len(nodeClaims); i++ { - nodeClaims[i].StatusConditions().SetTrue(v1.ConditionTypeDrifted) - ExpectApplied(ctx, env.Client, nodeClaims[i], nodes[i]) + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: lo.ToPtr(true), + BlockOwnerDeletion: lo.ToPtr(true), + }, + }}}) + // Bind the pods to the first 2 nodes for each nodepool. + for i := range pods { + ExpectApplied(ctx, env.Client, pods[i]) + ExpectManualBinding(ctx, env.Client, pods[i], ns[i]) + } } // inform cluster state about nodes and nodeclaims ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) - ExpectSingletonReconciled(ctx, disruptionController) + // Reconcile 30 times, enqueuing 10 commands total + for range 30 { + ExpectSingletonReconciled(ctx, disruptionController) + } for _, np := range nps { - metric, found := FindMetricWithLabelValues("karpenter_nodepools_allowed_disruptions", map[string]string{ - "nodepool": np.Name, + ExpectMetricGaugeValue(disruption.NodePoolAllowedDisruptions, 1, map[string]string{ + metrics.NodePoolLabel: np.Name, + metrics.ReasonLabel: string(v1.DisruptionReasonDrifted), }) - Expect(found).To(BeTrue()) - Expect(metric.GetGauge().GetValue()).To(BeNumerically("==", 3)) } - // Execute the command in the queue, deleting all nodes - ExpectSingletonReconciled(ctx, queue) - Expect(len(ExpectNodeClaims(ctx, env.Client))).To(Equal(0)) + // Delete 10 nodes, 1 node per nodepool, according to their budgets + for range 30 { + ExpectSingletonReconciled(ctx, queue) + } + Expect(len(ExpectNodeClaims(ctx, env.Client))).To(Equal(20)) + // These nodes will disrupt because of Drift instead of Emptiness because they are not marked consolidatable + ExpectMetricCounterValue(disruption.DecisionsPerformedTotal, 10, map[string]string{ + metrics.ReasonLabel: "drifted", + }) }) }) Context("Drift", func() { @@ -518,6 +473,21 @@ var _ = Describe("Drift", func() { Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(1)) ExpectExists(ctx, env.Client, nodeClaim) }) + It("should ignore nodes with the drifted status condition set to false", func() { + nodeClaim.StatusConditions().SetFalse(v1.ConditionTypeDrifted, "NotDrifted", "NotDrifted") + ExpectApplied(ctx, env.Client, nodeClaim, node, nodePool) + + // inform cluster state about nodes and nodeclaims + ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) + + fakeClock.Step(10 * time.Minute) + + ExpectSingletonReconciled(ctx, disruptionController) + + // Expect to not create or delete more nodeclaims + Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(1)) + ExpectExists(ctx, env.Client, nodeClaim) + }) It("should ignore nodes with the karpenter.sh/do-not-disrupt annotation", func() { node.Annotations = lo.Assign(node.Annotations, map[string]string{v1.DoNotDisruptAnnotationKey: "true"}) ExpectApplied(ctx, env.Client, nodeClaim, node, nodePool) @@ -532,6 +502,64 @@ var _ = Describe("Drift", func() { Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(1)) ExpectExists(ctx, env.Client, nodeClaim) }) + It("should delete drifted nodes with the karpenter.sh/do-not-disrupt annotation set to false", func() { + node.Annotations = lo.Assign(node.Annotations, map[string]string{v1.DoNotDisruptAnnotationKey: "false"}) + labels := map[string]string{ + "app": "test", + } + // create replicaset + rs := test.ReplicaSet() + ExpectApplied(ctx, env.Client, rs) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) + + pod := test.Pod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: lo.ToPtr(true), + BlockOwnerDeletion: lo.ToPtr(true), + }, + }}}) + nodeClaim2, node2 := test.NodeClaimAndNode(v1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1.NodePoolLabelKey: nodePool.Name, + corev1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1.CapacityTypeLabelKey: mostExpensiveOffering.Requirements.Get(v1.CapacityTypeLabelKey).Any(), + corev1.LabelTopologyZone: mostExpensiveOffering.Requirements.Get(corev1.LabelTopologyZone).Any(), + }, + }, + Status: v1.NodeClaimStatus{ + ProviderID: test.RandomProviderID(), + Allocatable: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceCPU: resource.MustParse("32"), + corev1.ResourcePods: resource.MustParse("100"), + }, + }, + }) + + ExpectApplied(ctx, env.Client, rs, pod, nodeClaim, nodeClaim2, node, node2, nodePool) + ExpectManualBinding(ctx, env.Client, pod, node) + + // inform cluster state about nodes and nodeclaims + ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node, node2}, []*v1.NodeClaim{nodeClaim, nodeClaim2}) + + // Process candidates + ExpectSingletonReconciled(ctx, disruptionController) + // Process the eligible candidate so that the node can be deleted. + ExpectSingletonReconciled(ctx, queue) + // Cascade any deletion of the nodeClaim to the node + ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaim) + + // We should delete the nodeClaim that has drifted + Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(1)) + Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(1)) + ExpectNotFound(ctx, env.Client, nodeClaim, node) + }) It("should not create replacements for drifted nodes that have pods with the karpenter.sh/do-not-disrupt annotation", func() { pod := test.Pod(test.PodOptions{ ObjectMeta: metav1.ObjectMeta{ @@ -601,79 +629,7 @@ var _ = Describe("Drift", func() { Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(1)) ExpectExists(ctx, env.Client, nodeClaim) }) - It("should ignore nodes with the drifted status condition set to false", func() { - nodeClaim.StatusConditions().SetFalse(v1.ConditionTypeDrifted, "NotDrifted", "NotDrifted") - ExpectApplied(ctx, env.Client, nodeClaim, node, nodePool) - - // inform cluster state about nodes and nodeclaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) - - fakeClock.Step(10 * time.Minute) - - ExpectSingletonReconciled(ctx, disruptionController) - - // Expect to not create or delete more nodeclaims - Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(1)) - ExpectExists(ctx, env.Client, nodeClaim) - }) - It("can delete drifted nodes", func() { - ExpectApplied(ctx, env.Client, nodeClaim, node, nodePool) - - // inform cluster state about nodes and nodeclaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) - - fakeClock.Step(10 * time.Minute) - ExpectSingletonReconciled(ctx, disruptionController) - // Process the item so that the nodes can be deleted. - ExpectSingletonReconciled(ctx, queue) - // Cascade any deletion of the nodeClaim to the node - ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaim) - - // We should delete the nodeClaim that has drifted - Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(0)) - Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(0)) - ExpectNotFound(ctx, env.Client, nodeClaim, node) - }) - It("should disrupt all empty drifted nodes in parallel", func() { - nodeClaims, nodes := test.NodeClaimsAndNodes(100, v1.NodeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1.NodePoolLabelKey: nodePool.Name, - corev1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1.CapacityTypeLabelKey: mostExpensiveOffering.Requirements.Get(v1.CapacityTypeLabelKey).Any(), - corev1.LabelTopologyZone: mostExpensiveOffering.Requirements.Get(corev1.LabelTopologyZone).Any(), - }, - }, - Status: v1.NodeClaimStatus{ - Allocatable: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("32"), - corev1.ResourcePods: resource.MustParse("100"), - }, - }, - }) - for _, m := range nodeClaims { - m.StatusConditions().SetTrue(v1.ConditionTypeDrifted) - ExpectApplied(ctx, env.Client, m) - } - for _, n := range nodes { - ExpectApplied(ctx, env.Client, n) - } - ExpectApplied(ctx, env.Client, nodePool) - - // inform cluster state about nodes and nodeClaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) - ExpectSingletonReconciled(ctx, disruptionController) - - // Process the item so that the nodes can be deleted. - ExpectSingletonReconciled(ctx, queue) - // Cascade any deletion of the nodeClaim to the node - ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaims...) - - // Expect that the drifted nodeClaims are gone - Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(0)) - Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(0)) - }) - It("can replace drifted nodes", func() { + It("should replace drifted nodes", func() { labels := map[string]string{ "app": "test", } @@ -726,6 +682,63 @@ var _ = Describe("Drift", func() { Expect(nodeclaims[0].Name).ToNot(Equal(nodeClaim.Name)) Expect(nodes[0].Name).ToNot(Equal(node.Name)) }) + It("should delete drifted nodes", func() { + labels := map[string]string{ + "app": "test", + } + // create replicaset + rs := test.ReplicaSet() + ExpectApplied(ctx, env.Client, rs) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) + + pod := test.Pod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: lo.ToPtr(true), + BlockOwnerDeletion: lo.ToPtr(true), + }, + }}}) + nodeClaim2, node2 := test.NodeClaimAndNode(v1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1.NodePoolLabelKey: nodePool.Name, + corev1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1.CapacityTypeLabelKey: mostExpensiveOffering.Requirements.Get(v1.CapacityTypeLabelKey).Any(), + corev1.LabelTopologyZone: mostExpensiveOffering.Requirements.Get(corev1.LabelTopologyZone).Any(), + }, + }, + Status: v1.NodeClaimStatus{ + ProviderID: test.RandomProviderID(), + Allocatable: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceCPU: resource.MustParse("32"), + corev1.ResourcePods: resource.MustParse("100"), + }, + }, + }) + + ExpectApplied(ctx, env.Client, rs, pod, nodeClaim, nodeClaim2, node, node2, nodePool) + ExpectManualBinding(ctx, env.Client, pod, node) + + // inform cluster state about nodes and nodeclaims + ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node, node2}, []*v1.NodeClaim{nodeClaim, nodeClaim2}) + + // Process candidates + ExpectSingletonReconciled(ctx, disruptionController) + // Process the eligible candidate so that the node can be deleted. + ExpectSingletonReconciled(ctx, queue) + // Cascade any deletion of the nodeClaim to the node + ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaim) + + // We should delete the nodeClaim that has drifted + Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(1)) + Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(1)) + ExpectNotFound(ctx, env.Client, nodeClaim, node) + }) It("should untaint nodes when drift replacement fails", func() { cloudProvider.AllowedCreateCalls = 0 // fail the replacement and expect it to untaint @@ -941,24 +954,5 @@ var _ = Describe("Drift", func() { ExpectExists(ctx, env.Client, nodeClaim) ExpectExists(ctx, env.Client, node) }) - It("should delete nodes with the karpenter.sh/do-not-disrupt annotation set to false", func() { - node.Annotations = lo.Assign(node.Annotations, map[string]string{v1.DoNotDisruptAnnotationKey: "false"}) - ExpectApplied(ctx, env.Client, nodeClaim, node, nodePool) - - // inform cluster state about nodes and nodeclaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) - - fakeClock.Step(10 * time.Minute) - ExpectSingletonReconciled(ctx, disruptionController) - // Process the item so that the nodes can be deleted. - ExpectSingletonReconciled(ctx, queue) - // Cascade any deletion of the nodeClaim to the node - ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaim) - - // We should delete the nodeClaim that has drifted - Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(0)) - Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(0)) - ExpectNotFound(ctx, env.Client, nodeClaim, node) - }) }) }) diff --git a/pkg/controllers/disruption/emptiness_test.go b/pkg/controllers/disruption/emptiness_test.go index 8d06380f84..a8e4cea79e 100644 --- a/pkg/controllers/disruption/emptiness_test.go +++ b/pkg/controllers/disruption/emptiness_test.go @@ -411,6 +411,31 @@ var _ = Describe("Emptiness", func() { Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(0)) ExpectNotFound(ctx, env.Client, nodeClaim, node) }) + It("can delete empty and drifted nodes", func() { + nodeClaim.StatusConditions().SetTrue(v1.ConditionTypeDrifted) + ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node) + + // inform cluster state about nodes and nodeclaims + ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) + + fakeClock.Step(10 * time.Minute) + wg := sync.WaitGroup{} + ExpectToWait(fakeClock, &wg) + ExpectSingletonReconciled(ctx, disruptionController) + wg.Wait() + + ExpectSingletonReconciled(ctx, queue) + // Cascade any deletion of the nodeClaim to the node + ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaim) + + // we should delete the empty node + Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(0)) + Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(0)) + ExpectNotFound(ctx, env.Client, nodeClaim, node) + ExpectMetricGaugeValue(disruption.EligibleNodes, 1, map[string]string{ + metrics.ReasonLabel: "empty", + }) + }) It("should ignore nodes without the consolidatable status condition", func() { _ = nodeClaim.StatusConditions().Clear(v1.ConditionTypeConsolidatable) ExpectApplied(ctx, env.Client, nodeClaim, node, nodePool) @@ -439,6 +464,28 @@ var _ = Describe("Emptiness", func() { Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(1)) ExpectExists(ctx, env.Client, nodeClaim) }) + It("should delete nodes with the karpenter.sh/do-not-disrupt annotation set to false", func() { + node.Annotations = lo.Assign(node.Annotations, map[string]string{v1.DoNotDisruptAnnotationKey: "false"}) + ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node) + + // inform cluster state about nodes and nodeclaims + ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) + + fakeClock.Step(10 * time.Minute) + wg := sync.WaitGroup{} + ExpectToWait(fakeClock, &wg) + ExpectSingletonReconciled(ctx, disruptionController) + wg.Wait() + + ExpectSingletonReconciled(ctx, queue) + // Cascade any deletion of the nodeClaim to the node + ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaim) + + // we should delete the empty node + Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(0)) + Expect(ExpectNodes(ctx, env.Client)).To(HaveLen(0)) + ExpectNotFound(ctx, env.Client, nodeClaim, node) + }) It("should ignore nodes that have pods", func() { pod := test.Pod() ExpectApplied(ctx, env.Client, nodeClaim, node, nodePool, pod) diff --git a/pkg/controllers/disruption/suite_test.go b/pkg/controllers/disruption/suite_test.go index 78aeb4a3cc..33ff2867de 100644 --- a/pkg/controllers/disruption/suite_test.go +++ b/pkg/controllers/disruption/suite_test.go @@ -1833,18 +1833,20 @@ var _ = Describe("Metrics", func() { }) It("should fire metrics for single node empty disruption", func() { nodeClaim, node := nodeClaims[0], nodes[0] - nodeClaim.StatusConditions().SetTrue(v1.ConditionTypeDrifted) - ExpectApplied(ctx, env.Client, nodeClaim, node, nodePool) + ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node) // inform cluster state about nodes and nodeclaims ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) fakeClock.Step(10 * time.Minute) + wg := sync.WaitGroup{} + ExpectToWait(fakeClock, &wg) ExpectSingletonReconciled(ctx, disruptionController) + wg.Wait() ExpectMetricCounterValue(disruption.DecisionsPerformedTotal, 1, map[string]string{ "decision": "delete", - metrics.ReasonLabel: "drifted", + metrics.ReasonLabel: "empty", }) }) It("should fire metrics for single node delete disruption", func() {