Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ _tmp/
vendordiff.patch
.idea/
*.code-workspace
.vscode/
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ parameters associated with the strategies can be configured too. By default, all

The policy also includes common configuration for all the strategies:
- `nodeSelector` - limiting the nodes which are processed
- `evictLocalStoragePods` - allowing to evict pods with local storage
- `evictLocalStoragePods` - allows eviction of pods with local storage
- `evictSystemCriticalPods` - [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns
- `ignorePvcPods` - set whether PVC pods should be evicted or ignored (defaults to `false`)
- `maxNoOfPodsToEvictPerNode` - maximum number of pods evicted from each node (summed through all strategies)

Expand All @@ -122,6 +123,7 @@ apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
nodeSelector: prod=dev
evictLocalStoragePods: true
evictSystemCriticalPods: true
maxNoOfPodsToEvictPerNode: 40
ignorePvcPods: false
strategies:
Expand Down Expand Up @@ -480,6 +482,9 @@ All strategies are able to configure a priority threshold, only pods under the t
specify this threshold by setting `thresholdPriorityClassName`(setting the threshold to the value of the given
priority class) or `thresholdPriority`(directly setting the threshold) parameters. By default, this threshold
is set to the value of `system-cluster-critical` priority class.

Note: Setting `evictSystemCriticalPods` to true disables priority filtering entirely.

E.g.

Setting `thresholdPriority`
Expand Down Expand Up @@ -547,12 +552,12 @@ strategies:

When the descheduler decides to evict pods from a node, it employs the following general mechanism:

* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted.
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted (unless `evictSystemCriticalPods: true` is set).
* Pods (static or mirrored pods or stand alone pods) not part of an ReplicationController, ReplicaSet(Deployment), StatefulSet, or Job are
never evicted because these pods won't be recreated.
* Pods associated with DaemonSets are never evicted.
* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set)
* Pods with PVCs are evicted unless `ignorePvcPods: true` is set.
* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set).
* Pods with PVCs are evicted (unless `ignorePvcPods: true` is set).
* In `LowNodeUtilization` and `RemovePodsViolatingInterPodAntiAffinity`, pods are evicted by their priority from low to high, and if they have same priority,
best effort pods are evicted before burstable and guaranteed pods.
* All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This
Expand Down
3 changes: 3 additions & 0 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ type DeschedulerPolicy struct {
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool

// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
EvictSystemCriticalPods *bool

// IgnorePVCPods prevents pods with PVCs from being evicted.
IgnorePVCPods *bool

Expand Down
3 changes: 3 additions & 0 deletions pkg/api/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ type DeschedulerPolicy struct {
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool `json:"evictLocalStoragePods,omitempty"`

// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
EvictSystemCriticalPods *bool `json:"evictSystemCriticalPods,omitempty"`

// IgnorePVCPods prevents pods with PVCs from being evicted.
IgnorePVCPods *bool `json:"ignorePvcPods,omitempty"`

Expand Down
9 changes: 9 additions & 0 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods
}

evictSystemCriticalPods := false
if deschedulerPolicy.EvictSystemCriticalPods != nil {
evictSystemCriticalPods = *deschedulerPolicy.EvictSystemCriticalPods
if evictSystemCriticalPods {
klog.V(1).InfoS("Warning: EvictSystemCriticalPods is set to True. This could cause eviction of Kubernetes system pods.")
}
}

ignorePvcPods := false
if deschedulerPolicy.IgnorePVCPods != nil {
ignorePvcPods = *deschedulerPolicy.IgnorePVCPods
Expand Down Expand Up @@ -124,6 +132,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
maxNoOfPodsToEvictPerNode,
nodes,
evictLocalStoragePods,
evictSystemCriticalPods,
ignorePvcPods,
)

Expand Down
111 changes: 45 additions & 66 deletions pkg/descheduler/evictions/evictions.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,14 @@ const (
type nodePodEvictedCount map[*v1.Node]int

type PodEvictor struct {
client clientset.Interface
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode int
nodepodCount nodePodEvictedCount
evictLocalStoragePods bool
ignorePvcPods bool
client clientset.Interface
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode int
nodepodCount nodePodEvictedCount
evictLocalStoragePods bool
evictSystemCriticalPods bool
ignorePvcPods bool
}

func NewPodEvictor(
Expand All @@ -62,6 +63,7 @@ func NewPodEvictor(
maxPodsToEvictPerNode int,
nodes []*v1.Node,
evictLocalStoragePods bool,
evictSystemCriticalPods bool,
ignorePvcPods bool,
) *PodEvictor {
var nodePodCount = make(nodePodEvictedCount)
Expand All @@ -71,13 +73,14 @@ func NewPodEvictor(
}

return &PodEvictor{
client: client,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
nodepodCount: nodePodCount,
evictLocalStoragePods: evictLocalStoragePods,
ignorePvcPods: ignorePvcPods,
client: client,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
nodepodCount: nodePodCount,
evictLocalStoragePods: evictLocalStoragePods,
evictSystemCriticalPods: evictSystemCriticalPods,
ignorePvcPods: ignorePvcPods,
}
}

Expand Down Expand Up @@ -188,53 +191,65 @@ func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable {
}

ev := &evictable{}
if !pe.evictLocalStoragePods {
if !pe.evictSystemCriticalPods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if IsPodWithLocalStorage(pod) {
return fmt.Errorf("pod has local storage and descheduler is not configured with --evict-local-storage-pods")
// Moved from IsEvictable function to allow for disabling
if utils.IsCriticalPriorityPod(pod) {
return fmt.Errorf("pod has system critical priority")
}
return nil
})

if options.priority != nil {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if IsPodEvictableBasedOnPriority(pod, *options.priority) {
return nil
}
return fmt.Errorf("pod has higher priority than specified priority class threshold")
})
}
}
if pe.ignorePvcPods {
if !pe.evictLocalStoragePods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if IsPodWithPVC(pod) {
return fmt.Errorf("pod has a PVC and descheduler is configured to ignore PVC pods")
if utils.IsPodWithLocalStorage(pod) {
return fmt.Errorf("pod has local storage and descheduler is not configured with evictLocalStoragePods")
}
return nil
})
}
if options.priority != nil {
if pe.ignorePvcPods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if IsPodEvictableBasedOnPriority(pod, *options.priority) {
return nil
if utils.IsPodWithPVC(pod) {
return fmt.Errorf("pod has a PVC and descheduler is configured to ignore PVC pods")
}
return fmt.Errorf("pod has higher priority than specified priority class threshold")
return nil
})
}

return ev
}

// IsEvictable decides when a pod is evictable
func (ev *evictable) IsEvictable(pod *v1.Pod) bool {
checkErrs := []error{}
if IsCriticalPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is critical"))
}

ownerRefList := podutil.OwnerRef(pod)
if IsDaemonsetPod(ownerRefList) {
if utils.IsDaemonsetPod(ownerRefList) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod"))
}

if len(ownerRefList) == 0 {
checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs"))
}

if IsMirrorPod(pod) {
if utils.IsMirrorPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod"))
}

if utils.IsStaticPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a static pod"))
}

for _, c := range ev.constraints {
if err := c(pod); err != nil {
checkErrs = append(checkErrs, err)
Expand All @@ -245,25 +260,8 @@ func (ev *evictable) IsEvictable(pod *v1.Pod) bool {
klog.V(4).InfoS("Pod lacks an eviction annotation and fails the following checks", "pod", klog.KObj(pod), "checks", errors.NewAggregate(checkErrs).Error())
return false
}
return true
}

func IsCriticalPod(pod *v1.Pod) bool {
return utils.IsCriticalPod(pod)
}

func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool {
for _, ownerRef := range ownerRefList {
if ownerRef.Kind == "DaemonSet" {
return true
}
}
return false
}

// IsMirrorPod checks whether the pod is a mirror pod.
func IsMirrorPod(pod *v1.Pod) bool {
return utils.IsMirrorPod(pod)
return true
}

// HaveEvictAnnotation checks if the pod have evict annotation
Expand All @@ -272,25 +270,6 @@ func HaveEvictAnnotation(pod *v1.Pod) bool {
return found
}

func IsPodWithLocalStorage(pod *v1.Pod) bool {
for _, volume := range pod.Spec.Volumes {
if volume.HostPath != nil || volume.EmptyDir != nil {
return true
}
}

return false
}

func IsPodWithPVC(pod *v1.Pod) bool {
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim != nil {
return true
}
}
return false
}

// IsPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec.
func IsPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool {
return pod.Spec.Priority == nil || *pod.Spec.Priority < priority
Expand Down
Loading