docs: add info about mirror pods #5565

vadasambar · 2023-03-06T04:45:48Z

What type of PR is this?

/kind documentation

What this PR does / why we need it:

We use the wording mirror pods in our flags but don't really talk about what they mean anywhere. e.g.,

More can be found in the FAQ here
This can be confusing for end users.

Which issue(s) this PR fixes:

Fixes #5566

Special notes for your reviewer:

Does this PR introduce a user-facing change?

NONE

- because we use `mirror pods` in our flags but don't really talk about what they mean anywhere

k8s-ci-robot · 2023-03-06T04:45:53Z

[APPROVALNOTIFIER] This PR is NOT APPROVED

This pull-request has been approved by: vadasambar
Once this PR has been reviewed and has the lgtm label, please assign feiskyer for approval. For more information see the Kubernetes Code Review Process.

The full list of commands accepted by this bot can be found here.

Needs approval from an approver in each of these files:

cluster-autoscaler/OWNERS

Approvers can indicate their approval by writing /approve in a comment
Approvers can cancel approval by writing /approve cancel in a comment

vadasambar · 2023-03-06T05:43:44Z

cluster-autoscaler/FAQ.md

@@ -722,6 +722,9 @@ CA respects `nodeSelector` and `requiredDuringSchedulingIgnoredDuringExecution`

 However, CA does not consider "soft" constraints like `preferredDuringSchedulingIgnoredDuringExecution` when selecting node groups. That means that if CA has two or more node groups available for expansion, it will not use soft constraints to pick one node group over another.

+### What are mirror pods?
+Mirror pods are pods which are considered `replicated` i.e., if the node on which the pod is running goes down, the pod can get scheduled on another node.  


I have deliberately kept the description short here since our internal mechanism for identifying mirror pods is going to change.
Current way of finding mirror pods:

autoscaler/cluster-autoscaler/utils/drain/drain.go

Lines 96 to 231 in e551e1f

for _, pod := range podList {

if pod_util.IsMirrorPod(pod) {

continue

}

// Possibly skip a pod under deletion but only if it was being deleted for long enough

// to avoid a situation when we delete the empty node immediately after the pod was marked for

// deletion without respecting any graceful termination.

if IsPodLongTerminating(pod, currentTime) {

// pod is being deleted for long enough - no need to care about it.

continue

}

isDaemonSetPod := false

replicated := false

safeToEvict := hasSafeToEvictAnnotation(pod)

terminal := isPodTerminal(pod)

controllerRef := ControllerRef(pod)

refKind := ""

if controllerRef != nil {

refKind = controllerRef.Kind

}

// For now, owner controller must be in the same namespace as the pod

// so OwnerReference doesn't have its own Namespace field

controllerNamespace := pod.Namespace

if refKind == "ReplicationController" {

if checkReferences {

rc, err := listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name)

// Assume a reason for an error is because the RC is either

// gone/missing or that the rc has too few replicas configured.

// TODO: replace the minReplica check with pod disruption budget.

if err == nil && rc != nil {

if rc.Spec.Replicas != nil && *rc.Spec.Replicas < minReplica {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d",

pod.Namespace, pod.Name, rc.Spec.Replicas, minReplica)

}

replicated = true

} else {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)

}

} else {

replicated = true

}

} else if pod_util.IsDaemonSetPod(pod) {

isDaemonSetPod = true

// don't have listener for other DaemonSet kind

// TODO: we should use a generic client for checking the reference.

if checkReferences && refKind == "DaemonSet" {

_, err := listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name)

if apierrors.IsNotFound(err) {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)

} else if err != nil {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)

}

}

} else if refKind == "Job" {

if checkReferences {

job, err := listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name)

// Assume the only reason for an error is because the Job is

// gone/missing, not for any other cause. TODO(mml): something more

// sophisticated than this

if err == nil && job != nil {

replicated = true

} else {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)

}

} else {

replicated = true

}

} else if refKind == "ReplicaSet" {

if checkReferences {

rs, err := listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name)

// Assume the only reason for an error is because the RS is

// gone/missing, not for any other cause. TODO(mml): something more

// sophisticated than this

if err == nil && rs != nil {

if rs.Spec.Replicas != nil && *rs.Spec.Replicas < minReplica {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d",

pod.Namespace, pod.Name, rs.Spec.Replicas, minReplica)

}

replicated = true

} else {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)

}

} else {

replicated = true

}

} else if refKind == "StatefulSet" {

if checkReferences {

ss, err := listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name)

// Assume the only reason for an error is because the StatefulSet is

// gone/missing, not for any other cause. TODO(mml): something more

// sophisticated than this

if err == nil && ss != nil {

replicated = true

} else {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)

}

} else {

replicated = true

}

}

if isDaemonSetPod {

daemonSetPods = append(daemonSetPods, pod)

continue

}

if !safeToEvict && !terminal {

if !replicated {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotReplicated}, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name)

}

if pod.Namespace == "kube-system" && skipNodesWithSystemPods {

hasPDB, err := checkKubeSystemPDBs(pod, kubeSystemPDBs)

if err != nil {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error matching pods to pdbs: %v", err)

}

if !hasPDB {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnmovableKubeSystemPod}, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)

}

}

if HasLocalStorage(pod) && skipNodesWithLocalStorage {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: LocalStorageRequested}, fmt.Errorf("pod with local storage present: %s", pod.Name)

}

if hasNotSafeToEvictAnnotation(pod) {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotSafeToEvictAnnotation}, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name)

}

}

pods = append(pods, pod)

}

return pods, daemonSetPods, nil, nil

New way of finding mirror pods we are working on: #5507

Hey @vadasambar, just took a look at the PR. Thanks for exposing that context! My 2 cents:

According to my understanding, a mirror Pod is a Kubernetes-wide term, it is not autoscaler specific; as explained in the guide https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/. Essentially, a mirror Pod is a Pod on the API server to represent a static Pod configured with the kubelet.

A mirror Pod is not controlled by the control plane, thus it is not scheduled (please correct me if I am wrong).

If the node of the Pod goes down, no control-plane controller will replicate it on another node.

IMHO the proposed definition is not precise and we should rather stick to the Kubernetes website definition. We may point the users to the Kubernetes guide to better understand what a mirror Pod is, and the section you propose to add could be reformed to a "How the CA treats mirror Pods". In particular, we may explain wrt to mirror Pods:

The cluster autoscaler can be configured to take mirror Pods into consideration or ignore them when calculating the utilization of a node

The mirror Pods of a node will not prevent the autoscaler from removing a node.

WDYT?

@gregth thanks for the comment and explaining everything to me. I feel quite stupid after creating this PR. I thought mirror pod could be used interchangeably with replicated pod (I thought mirror pod is a lingo specific to cluster-autoscaler). I couldn't be more wrong.

According to my understanding, a mirror Pod is a Kubernetes-wide term, it is not autoscaler specific; as explained in the guide https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/. Essentially, a mirror Pod is a Pod on the API server to represent a static Pod configured with the kubelet.

You are right.

If the node of the Pod goes down, no control-plane controller will replicate it on another node.

You are right.

IMHO the proposed definition is not precise and we should rather stick to the Kubernetes website definition. We may point the users to the Kubernetes guide to better understand what a mirror Pod is, and the section you propose to add could be reformed to a "How the CA treats mirror Pods". In particular, we may explain wrt to mirror Pods:

I agree with linking to the official definition. I don't think the section I wrote in this PR is of much use now. I think having a section around "How the CA treats mirror Pods" seems valuable to me. Looks like I can change this PR into:

Removing the section I added

Linking official docs for mirror pod wherever we have used the term mirror pod in the FAQ

Adding a section around "How the CA treats mirror Pods"

P.S.

I have deliberately kept the description short here since our internal mechanism for identifying mirror pods is going to change.
Current way of finding mirror pods:

This ^ is wrong.

New way of finding mirror pods we are working on: #5507

This ^ is wrong too.

Correct sentence:
We are working on a new way to find pods which should be deleted on node drain.
Current way of finding such pods:

autoscaler/cluster-autoscaler/utils/drain/drain.go

Lines 96 to 231 in e551e1f

for _, pod := range podList {

if pod_util.IsMirrorPod(pod) {

continue

}

// Possibly skip a pod under deletion but only if it was being deleted for long enough

// to avoid a situation when we delete the empty node immediately after the pod was marked for

// deletion without respecting any graceful termination.

if IsPodLongTerminating(pod, currentTime) {

// pod is being deleted for long enough - no need to care about it.

continue

}

isDaemonSetPod := false

replicated := false

safeToEvict := hasSafeToEvictAnnotation(pod)

terminal := isPodTerminal(pod)

controllerRef := ControllerRef(pod)

refKind := ""

if controllerRef != nil {

refKind = controllerRef.Kind

}

// For now, owner controller must be in the same namespace as the pod

// so OwnerReference doesn't have its own Namespace field

controllerNamespace := pod.Namespace

if refKind == "ReplicationController" {

if checkReferences {

rc, err := listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name)

// Assume a reason for an error is because the RC is either

// gone/missing or that the rc has too few replicas configured.

// TODO: replace the minReplica check with pod disruption budget.

if err == nil && rc != nil {

if rc.Spec.Replicas != nil && *rc.Spec.Replicas < minReplica {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d",

pod.Namespace, pod.Name, rc.Spec.Replicas, minReplica)

}

replicated = true

} else {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)

}

} else {

replicated = true

}

} else if pod_util.IsDaemonSetPod(pod) {

isDaemonSetPod = true

// don't have listener for other DaemonSet kind

// TODO: we should use a generic client for checking the reference.

if checkReferences && refKind == "DaemonSet" {

_, err := listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name)

if apierrors.IsNotFound(err) {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)

} else if err != nil {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)

}

}

} else if refKind == "Job" {

if checkReferences {

job, err := listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name)

// Assume the only reason for an error is because the Job is

// gone/missing, not for any other cause. TODO(mml): something more

// sophisticated than this

if err == nil && job != nil {

replicated = true

} else {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)

}

} else {

replicated = true

}

} else if refKind == "ReplicaSet" {

if checkReferences {

rs, err := listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name)

// Assume the only reason for an error is because the RS is

// gone/missing, not for any other cause. TODO(mml): something more

// sophisticated than this

if err == nil && rs != nil {

if rs.Spec.Replicas != nil && *rs.Spec.Replicas < minReplica {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d",

pod.Namespace, pod.Name, rs.Spec.Replicas, minReplica)

}

replicated = true

} else {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)

}

} else {

replicated = true

}

} else if refKind == "StatefulSet" {

if checkReferences {

ss, err := listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name)

// Assume the only reason for an error is because the StatefulSet is

// gone/missing, not for any other cause. TODO(mml): something more

// sophisticated than this

if err == nil && ss != nil {

replicated = true

} else {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)

}

} else {

replicated = true

}

}

if isDaemonSetPod {

daemonSetPods = append(daemonSetPods, pod)

continue

}

if !safeToEvict && !terminal {

if !replicated {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotReplicated}, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name)

}

if pod.Namespace == "kube-system" && skipNodesWithSystemPods {

hasPDB, err := checkKubeSystemPDBs(pod, kubeSystemPDBs)

if err != nil {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error matching pods to pdbs: %v", err)

}

if !hasPDB {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnmovableKubeSystemPod}, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)

}

}

if HasLocalStorage(pod) && skipNodesWithLocalStorage {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: LocalStorageRequested}, fmt.Errorf("pod with local storage present: %s", pod.Name)

}

if hasNotSafeToEvictAnnotation(pod) {

return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotSafeToEvictAnnotation}, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name)

}

}

pods = append(pods, pod)

}

return pods, daemonSetPods, nil, nil

New way of finding such pods: #5507

Maybe we can add a link to https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/ in places where 'mirror pod' is used? Would that help avoid confusion? It's a kubernetes-wide term, but it's pretty niche and I don't think many people are familiar with it.

I think that would reduce confusion.
It seems like we are using the term mirror pods in 27 different places

suraj@suraj:~/sandbox/autoscaler/cluster-autoscaler$ grep -R "mirror" . --exclude-dir=vendor | grep "pod" ./main.go: ignoreMirrorPodsUtilization = flag.Bool("ignore-mirror-pods-utilization", false, ./main.go: skipNodesWithSystemPods = flag.Bool("skip-nodes-with-system-pods", true, "If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods)") ./FAQ.md:* The sum of cpu and memory requests of all pods running on this node (DaemonSet pods and Mirror pods are included by default but this is configurable with `--ignore-daemonsets-utilization` and `--ignore-mirror-pods-utilization` flags) is smaller ./FAQ.md:| `ignore-mirror-pods-utilization` | Whether Mirror pods will be ignored when calculating resource utilization for scaling down | false ./FAQ.md:| `skip-nodes-with-system-pods` | If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods) | true ./utils/test/test_utils.go:// SetMirrorPodSpec sets pod spec to make it a mirror pod ./utils/test/test_utils.go: pod.ObjectMeta.Annotations[kube_types.ConfigMirrorAnnotationKey] = "mirror" ./utils/pod/pod.go:// IsMirrorPod checks whether the pod is a mirror pod. ./utils/pod/pod_test.go: name: "filter-out single mirror pod", ./utils/drain/drain.go: // UnmovableKubeSystemPod - pod is blocking scale down because it's a non-daemonset, non-mirrored, non-pdb-assigned kube-system pod. ./utils/drain/drain.go: return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnmovableKubeSystemPod}, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name) ./config/autoscaling_options.go: // SkipNodesWithSystemPods tells if nodes with pods from kube-system should be deleted (except for DaemonSet or mirror pods) ./simulator/nodes_test.go: name: "node with non-DS/mirror pods", ./simulator/nodes_test.go: name: "node with a mirror pod", ./simulator/nodes_test.go: name: "node with a deleted mirror pod", ./simulator/utilization/info.go: // factor mirror pods out of the utilization calculations ./simulator/utilization/info_test.go: nodeInfo = newNodeInfo(node, pod, pod, pod2, mirrorPod) ./simulator/utilization/info_test.go: nodeInfo = newNodeInfo(node, pod, pod2, mirrorPod) ./simulator/utilization/info_test.go: nodeInfo = newNodeInfo(node, pod, mirrorPod, daemonSetPod3) ./simulator/drain.go: // SkipNodesWithSystemPods tells if nodes with pods from kube-system should be deleted (except for DaemonSet or mirror pods) ./simulator/nodes.go: // Add scheduled mirror and DS pods ./core/scaledown/planner/planner_test.go: name: "recently evicted mirror pod, eligible", ./core/scaledown/actuation/drain.go:// DrainNode works like DrainNodeWithPods, but lists of pods to evict don't have to be provided. All non-mirror, non-DS pods on the ./core/scaledown/actuation/drain_test.go: "mirror pods are never returned": { ./core/scaledown/actuation/drain_test.go: pods: []*apiv1.Pod{mirrorPod("pod-1"), mirrorPod("pod-2")}, ./core/scaledown/actuation/drain_test.go: mirrorPod("mirror-pod-1"), mirrorPod("mirror-pod-2"), ./README.md:namespace (Cluster Autoscaler doesn't scale down node with non-mirrored kube-system pods running

suraj@suraj:~/sandbox/autoscaler/cluster-autoscaler$ grep -R "mirror" . --exclude-dir=vendor | grep "pod" | wc -l 27

Based on the latest master commit

suraj@suraj:~/sandbox/autoscaler/cluster-autoscaler$ git log commit 63b334f81a8820511c835401e0133d242d84da08 (HEAD -> master, upstream/master)

Based on the discussion, it seems like we can do:

add link to official definition of mirror pod OR static pod docs in FAQ wherever mirror pod is mentioned

add link to static pod docs wherever mirror pod is mentioned in the code

add a section around how CA treats mirror/static pods when draining a node

I will close this PR and create a separate issue for this. Thank you for the feedback.

docs: add info about mirror pods

c606d2a

- because we use `mirror pods` in our flags but don't really talk about what they mean anywhere

k8s-ci-robot added the do-not-merge/work-in-progress Indicates that a PR should not merge because it is a work in progress. label Mar 6, 2023

k8s-ci-robot added the cncf-cla: yes Indicates the PR's author has signed the CNCF CLA. label Mar 6, 2023

k8s-ci-robot requested a review from feiskyer March 6, 2023 04:45

k8s-ci-robot added the area/cluster-autoscaler label Mar 6, 2023

k8s-ci-robot requested a review from x13n March 6, 2023 04:45

k8s-ci-robot added the size/XS Denotes a PR that changes 0-9 lines, ignoring generated files. label Mar 6, 2023

vadasambar commented Mar 6, 2023

View reviewed changes

vadasambar marked this pull request as ready for review March 6, 2023 05:43

k8s-ci-robot removed the do-not-merge/work-in-progress Indicates that a PR should not merge because it is a work in progress. label Mar 6, 2023

k8s-ci-robot requested a review from BigDarkClown March 6, 2023 05:44

vadasambar mentioned this pull request Mar 8, 2023

Mar 2023 vadafoss/daily-updates#7

Closed

vadasambar closed this Mar 13, 2023

vadasambar mentioned this pull request Mar 22, 2023

Add info about mirror pods in FAQ #5566

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

docs: add info about mirror pods #5565

docs: add info about mirror pods #5565

vadasambar commented Mar 6, 2023 •

edited

Loading

k8s-ci-robot commented Mar 6, 2023

vadasambar Mar 6, 2023

gregth Mar 6, 2023

vadasambar Mar 8, 2023

vadasambar Mar 8, 2023 •

edited

Loading

MaciekPytel Mar 10, 2023

vadasambar Mar 13, 2023

vadasambar Mar 13, 2023 •

edited

Loading

vadasambar Mar 13, 2023

	for _, pod := range podList {
	if pod_util.IsMirrorPod(pod) {
	continue
	}

	// Possibly skip a pod under deletion but only if it was being deleted for long enough
	// to avoid a situation when we delete the empty node immediately after the pod was marked for
	// deletion without respecting any graceful termination.
	if IsPodLongTerminating(pod, currentTime) {
	// pod is being deleted for long enough - no need to care about it.
	continue
	}

	isDaemonSetPod := false
	replicated := false
	safeToEvict := hasSafeToEvictAnnotation(pod)
	terminal := isPodTerminal(pod)

	controllerRef := ControllerRef(pod)
	refKind := ""
	if controllerRef != nil {
	refKind = controllerRef.Kind
	}

	// For now, owner controller must be in the same namespace as the pod
	// so OwnerReference doesn't have its own Namespace field
	controllerNamespace := pod.Namespace

	if refKind == "ReplicationController" {
	if checkReferences {
	rc, err := listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name)
	// Assume a reason for an error is because the RC is either
	// gone/missing or that the rc has too few replicas configured.
	// TODO: replace the minReplica check with pod disruption budget.
	if err == nil && rc != nil {
	if rc.Spec.Replicas != nil && *rc.Spec.Replicas < minReplica {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d",
	pod.Namespace, pod.Name, rc.Spec.Replicas, minReplica)
	}
	replicated = true
	} else {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)
	}
	} else {
	replicated = true
	}
	} else if pod_util.IsDaemonSetPod(pod) {
	isDaemonSetPod = true
	// don't have listener for other DaemonSet kind
	// TODO: we should use a generic client for checking the reference.
	if checkReferences && refKind == "DaemonSet" {
	_, err := listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name)
	if apierrors.IsNotFound(err) {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)
	} else if err != nil {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)
	}
	}
	} else if refKind == "Job" {
	if checkReferences {
	job, err := listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name)

	// Assume the only reason for an error is because the Job is
	// gone/missing, not for any other cause. TODO(mml): something more
	// sophisticated than this
	if err == nil && job != nil {
	replicated = true
	} else {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)
	}
	} else {
	replicated = true
	}
	} else if refKind == "ReplicaSet" {
	if checkReferences {
	rs, err := listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name)

	// Assume the only reason for an error is because the RS is
	// gone/missing, not for any other cause. TODO(mml): something more
	// sophisticated than this
	if err == nil && rs != nil {
	if rs.Spec.Replicas != nil && *rs.Spec.Replicas < minReplica {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d",
	pod.Namespace, pod.Name, rs.Spec.Replicas, minReplica)
	}
	replicated = true
	} else {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)
	}
	} else {
	replicated = true
	}
	} else if refKind == "StatefulSet" {
	if checkReferences {
	ss, err := listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name)

	// Assume the only reason for an error is because the StatefulSet is
	// gone/missing, not for any other cause. TODO(mml): something more
	// sophisticated than this
	if err == nil && ss != nil {
	replicated = true
	} else {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)
	}
	} else {
	replicated = true
	}
	}
	if isDaemonSetPod {
	daemonSetPods = append(daemonSetPods, pod)
	continue
	}

	if !safeToEvict && !terminal {
	if !replicated {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotReplicated}, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name)
	}
	if pod.Namespace == "kube-system" && skipNodesWithSystemPods {
	hasPDB, err := checkKubeSystemPDBs(pod, kubeSystemPDBs)
	if err != nil {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error matching pods to pdbs: %v", err)
	}
	if !hasPDB {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnmovableKubeSystemPod}, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)
	}
	}
	if HasLocalStorage(pod) && skipNodesWithLocalStorage {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: LocalStorageRequested}, fmt.Errorf("pod with local storage present: %s", pod.Name)
	}
	if hasNotSafeToEvictAnnotation(pod) {
	return []apiv1.Pod{}, []apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotSafeToEvictAnnotation}, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name)
	}
	}
	pods = append(pods, pod)
	}
	return pods, daemonSetPods, nil, nil

docs: add info about mirror pods #5565

docs: add info about mirror pods #5565

Conversation

vadasambar commented Mar 6, 2023 • edited Loading

What type of PR is this?

What this PR does / why we need it:

Which issue(s) this PR fixes:

Special notes for your reviewer:

Does this PR introduce a user-facing change?

k8s-ci-robot commented Mar 6, 2023

vadasambar Mar 6, 2023

Choose a reason for hiding this comment

gregth Mar 6, 2023

Choose a reason for hiding this comment

vadasambar Mar 8, 2023

Choose a reason for hiding this comment

vadasambar Mar 8, 2023 • edited Loading

Choose a reason for hiding this comment

MaciekPytel Mar 10, 2023

Choose a reason for hiding this comment

vadasambar Mar 13, 2023

Choose a reason for hiding this comment

vadasambar Mar 13, 2023 • edited Loading

Choose a reason for hiding this comment

vadasambar Mar 13, 2023

Choose a reason for hiding this comment

vadasambar commented Mar 6, 2023 •

edited

Loading

vadasambar Mar 8, 2023 •

edited

Loading

vadasambar Mar 13, 2023 •

edited

Loading