Skip to content
2 changes: 1 addition & 1 deletion pkg/controllers/node/health/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ var _ = BeforeSuite(func() {
cloudProvider = fake.NewCloudProvider()
cloudProvider = fake.NewCloudProvider()
recorder = test.NewEventRecorder()
queue = terminator.NewTestingQueue(env.Client, recorder)
queue = terminator.NewQueue(env.Client, recorder)
healthController = health.NewController(env.Client, cloudProvider, fakeClock, recorder)
})

Expand Down
101 changes: 45 additions & 56 deletions pkg/controllers/node/termination/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ var _ = BeforeSuite(func() {

cloudProvider = fake.NewCloudProvider()
recorder = test.NewEventRecorder()
queue = terminator.NewTestingQueue(env.Client, recorder)
queue = terminator.NewQueue(env.Client, recorder)
terminationController = termination.NewController(fakeClock, env.Client, cloudProvider, terminator.NewTerminator(fakeClock, env.Client, queue, recorder), recorder)
})

Expand All @@ -86,7 +86,7 @@ var _ = Describe("Termination", func() {
BeforeEach(func() {
fakeClock.SetTime(time.Now())
cloudProvider.Reset()
*queue = lo.FromPtr(terminator.NewTestingQueue(env.Client, recorder))
*queue = lo.FromPtr(terminator.NewQueue(env.Client, recorder))

nodePool = test.NodePool()
nodeClaim, node = test.NodeClaimAndNode(v1.NodeClaim{ObjectMeta: metav1.ObjectMeta{Finalizers: []string{v1.TerminationFinalizer}}})
Expand Down Expand Up @@ -208,8 +208,11 @@ var _ = Describe("Termination", func() {
ExpectApplied(ctx, env.Client, node, nodeClaim, pod, pdb)
ExpectManualBinding(ctx, env.Client, pod, node)
Expect(env.Client.Delete(ctx, node)).To(Succeed())

// We only reconcile once since this label should be applied before draining the node
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node))
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
_ = ExpectObjectReconcileFailed(ctx, env.Client, queue, pod)

node = ExpectNodeExists(ctx, env.Client, node.Name)
Expect(node.Labels[corev1.LabelNodeExcludeBalancers]).Should(Equal("karpenter"))
})
Expand All @@ -225,9 +228,10 @@ var _ = Describe("Termination", func() {
// Trigger Termination Controller
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)

ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
Expect(queue.Has(node, podSkip)).To(BeFalse())
ExpectSingletonReconciled(ctx, queue)
Expect(queue.Has(podSkip)).To(BeFalse())
ExpectObjectReconciled(ctx, env.Client, queue, podEvict)

// Expect node to exist and be draining
ExpectNodeWithNodeClaimDraining(env.Client, node.Name)
Expand Down Expand Up @@ -256,9 +260,10 @@ var _ = Describe("Termination", func() {
// Trigger Termination Controller
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)

ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
Expect(queue.Has(node, podSkip)).To(BeFalse())
ExpectSingletonReconciled(ctx, queue)
Expect(queue.Has(podSkip)).To(BeFalse())
ExpectObjectReconciled(ctx, env.Client, queue, podEvict)

// Expect node to exist and be draining
ExpectNodeWithNodeClaimDraining(env.Client, node.Name)
Expand All @@ -267,7 +272,7 @@ var _ = Describe("Termination", func() {
EventuallyExpectTerminating(ctx, env.Client, podEvict)
ExpectDeleted(ctx, env.Client, podEvict)

Expect(queue.Has(node, podSkip)).To(BeFalse())
Expect(queue.Has(podSkip)).To(BeFalse())

// Reconcile to delete node
node = ExpectNodeExists(ctx, env.Client, node.Name)
Expand All @@ -289,7 +294,7 @@ var _ = Describe("Termination", func() {
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, podEvict)

// Expect node to exist and be draining
ExpectNodeWithNodeClaimDraining(env.Client, node.Name)
Expand Down Expand Up @@ -317,8 +322,9 @@ var _ = Describe("Termination", func() {
ExpectApplied(ctx, env.Client, node, nodeClaim, pod)
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)

ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, pod)

// Expect pod with no owner ref to be enqueued for eviction
EventuallyExpectTerminating(ctx, env.Client, pod)
Expand Down Expand Up @@ -385,13 +391,13 @@ var _ = Describe("Termination", func() {
ExpectNodeWithNodeClaimDraining(env.Client, node.Name)

// Expect podNoEvict to be added to the queue
Expect(queue.Has(node, podNoEvict)).To(BeTrue())
Expect(queue.Has(podNoEvict)).To(BeTrue())

// Attempt to evict the pod, but fail to do so
ExpectSingletonReconciled(ctx, queue)
_ = ExpectObjectReconcileFailed(ctx, env.Client, queue, podNoEvict)

// Expect podNoEvict to fail eviction due to PDB, and be retried
Expect(queue.Has(node, podNoEvict)).To(BeTrue())
Expect(queue.Has(podNoEvict)).To(BeTrue())

// Delete pod to simulate successful eviction
ExpectDeleted(ctx, env.Client, podNoEvict)
Expand Down Expand Up @@ -452,16 +458,16 @@ var _ = Describe("Termination", func() {
}
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node))
ExpectNodeWithNodeClaimDraining(env.Client, node.Name)
for range podGroup {
ExpectSingletonReconciled(ctx, queue)
for _, pod := range podGroup {
ExpectObjectReconciled(ctx, env.Client, queue, pod)
}
// Start draining the pod group, but don't complete it yet
EventuallyExpectTerminating(ctx, env.Client, lo.Map(podGroup, func(p *corev1.Pod, _ int) client.Object { return p })...)

// Look at the next pod group and ensure that none of the pods have started terminating on it
if i != len(podGroups)-1 {
for range podGroups[i+1] {
ExpectSingletonReconciled(ctx, queue)
for _, pod := range podGroups[i+1] {
Expect(queue.Has(pod)).To(BeFalse())
}
ConsistentlyExpectNotTerminating(ctx, env.Client, lo.Map(podGroups[i+1], func(p *corev1.Pod, _ int) client.Object { return p })...)
}
Expand All @@ -487,8 +493,9 @@ var _ = Describe("Termination", func() {
// Trigger Termination Controller
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)

ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation (non-critical)
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, podEvict)

// Expect node to exist and be draining
ExpectNodeWithNodeClaimDraining(env.Client, node.Name)
Expand All @@ -500,8 +507,8 @@ var _ = Describe("Termination", func() {
// Expect the critical pods to be evicted and deleted
node = ExpectNodeExists(ctx, env.Client, node.Name)
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation (critical)
ExpectSingletonReconciled(ctx, queue)
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, podNodeCritical)
ExpectObjectReconciled(ctx, env.Client, queue, podClusterCritical)

EventuallyExpectTerminating(ctx, env.Client, podNodeCritical, podClusterCritical)
ExpectDeleted(ctx, env.Client, podNodeCritical)
Expand Down Expand Up @@ -535,10 +542,10 @@ var _ = Describe("Termination", func() {
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, podEvict)

// Expect mirror pod to not be queued for eviction
Expect(queue.Has(node, podNoEvict)).To(BeFalse())
Expect(queue.Has(podNoEvict)).To(BeFalse())

// Expect podEvict to be enqueued for eviction then be successful
EventuallyExpectTerminating(ctx, env.Client, podEvict)
Expand Down Expand Up @@ -566,8 +573,8 @@ var _ = Describe("Termination", func() {
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
ExpectSingletonReconciled(ctx, queue)
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, pods[0])
ExpectObjectReconciled(ctx, env.Client, queue, pods[1])

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeDrained).IsUnknown()).To(BeTrue())
Expand Down Expand Up @@ -614,8 +621,8 @@ var _ = Describe("Termination", func() {
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
ExpectSingletonReconciled(ctx, queue)
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, pods[0])
ExpectObjectReconciled(ctx, env.Client, queue, pods[1])

// Expect the pods to be evicted
EventuallyExpectTerminating(ctx, env.Client, pods[0], pods[1])
Expand Down Expand Up @@ -644,8 +651,8 @@ var _ = Describe("Termination", func() {
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
ExpectSingletonReconciled(ctx, queue)
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, pods[0])
ExpectObjectReconciled(ctx, env.Client, queue, pods[1])

// Expect the pods to be evicted
EventuallyExpectTerminating(ctx, env.Client, pods[0], pods[1])
Expand Down Expand Up @@ -675,7 +682,8 @@ var _ = Describe("Termination", func() {
// Before grace period, node should not delete
Expect(env.Client.Delete(ctx, node)).To(Succeed())
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
ExpectSingletonReconciled(ctx, queue)
ExpectObjectReconciled(ctx, env.Client, queue, pod)

ExpectNodeExists(ctx, env.Client, node.Name)
EventuallyExpectTerminating(ctx, env.Client, pod)

Expand Down Expand Up @@ -720,7 +728,7 @@ var _ = Describe("Termination", func() {
ExpectNotFound(ctx, env.Client, node)

// Expect that the old pod's key still exists in the queue
Expect(queue.Has(node, pod)).To(BeTrue())
Expect(queue.Has(pod)).To(BeTrue())

// Re-create the pod and node, it should now have the same name, but a different UUID
node = test.Node(test.NodeOptions{
Expand All @@ -738,7 +746,8 @@ var _ = Describe("Termination", func() {
ExpectApplied(ctx, env.Client, node, pod)

// Trigger eviction queue with the pod key still in it
ExpectSingletonReconciled(ctx, queue)
Expect(queue.Has(pod)).To(BeTrue())
queue.Add(pod)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're adding a pod to the queue when it's already there?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's intentional, we want to verify that when we attempt to add the pod to the queue that it doesn't then get evicted because its already returning true for Has

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is actually making me realize that this change breaks this logic in this code -- it's possible for the following scenario to happen now and us to evict a new pod that shouldn't have been evicted:

  1. We add a pod to the channel
  2. This pod's namespace name is added to the controller workqueue
  3. The pod has previously been evicted and a new pod has launched but we didn't see it -- this means that when we pull the pod from the cache, we actually get the new pod and then we actually evict the new pod

A core tenant of whatever we do here has to be that when we add data to the queue, we also add the UUID of the pod that we pull -- if we don't do that, we have the potential to evict the wrong pod.


Consistently(func(g Gomega) {
g.Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(pod), pod)).To(Succeed())
Expand All @@ -764,6 +773,8 @@ var _ = Describe("Termination", func() {

Expect(env.Client.Delete(ctx, node)).To(Succeed())
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // DrainInitiation
ExpectObjectReconciled(ctx, env.Client, queue, pod)

ExpectNodeExists(ctx, env.Client, node.Name)
pod = ExpectExists(ctx, env.Client, pod)
Expect(pod.DeletionTimestamp.IsZero()).To(BeFalse())
Expand Down Expand Up @@ -798,8 +809,9 @@ var _ = Describe("Termination", func() {
// The pod should be deleted 60 seconds before the node's TGP expires
fakeClock.Step(175 * time.Second)
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node))
pod = ExpectExists(ctx, env.Client, pod)
Expect(pod.DeletionTimestamp.IsZero()).To(BeFalse())
ExpectObjectReconciled(ctx, env.Client, queue, pod)
ExpectDeleted(ctx, env.Client, pod)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like I mentioned below -- this actually does a deletion, which I don't think is what we were doing before -- is this what we want? Since we aren't really checking anything here after reconciling


})
Context("VolumeAttachments", func() {
It("should wait for volume attachments", func() {
Expand Down Expand Up @@ -944,29 +956,6 @@ var _ = Describe("Termination", func() {
Expect(ok).To(BeTrue())
Expect(lo.FromPtr(m.GetHistogram().SampleCount)).To(BeNumerically("==", 1))
})
It("should update the eviction queueDepth metric when reconciling pods", func() {
minAvailable := intstr.FromInt32(0)
labelSelector := map[string]string{test.RandomName(): test.RandomName()}
pdb := test.PodDisruptionBudget(test.PDBOptions{
Labels: labelSelector,
// Don't let any pod evict
MinAvailable: &minAvailable,
})
ExpectApplied(ctx, env.Client, pdb, node, nodeClaim)
pods := test.Pods(5, test.PodOptions{NodeName: node.Name, ObjectMeta: metav1.ObjectMeta{
OwnerReferences: defaultOwnerRefs,
Labels: labelSelector,
}})
ExpectApplied(ctx, env.Client, lo.Map(pods, func(p *corev1.Pod, _ int) client.Object { return p })...)

wqDepthBefore, _ := FindMetricWithLabelValues("workqueue_adds_total", map[string]string{"name": "eviction.workqueue"})
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)
ExpectRequeued(ExpectObjectReconciled(ctx, env.Client, terminationController, node)) // Drain
wqDepthAfter, ok := FindMetricWithLabelValues("workqueue_adds_total", map[string]string{"name": "eviction.workqueue"})
Expect(ok).To(BeTrue())
Expect(lo.FromPtr(wqDepthAfter.GetCounter().Value) - lo.FromPtr(wqDepthBefore.GetCounter().Value)).To(BeNumerically("==", 5))
})
})
})

Expand Down
Loading
Loading