From 25dc614b7afca618b584dab85e8bc3e22a9233b0 Mon Sep 17 00:00:00 2001 From: Simon Lundkvist Date: Tue, 4 Jun 2024 13:18:00 +0200 Subject: [PATCH] apps: upgrade kured --- helmfile.d/upstream/index.yaml | 2 +- .../upstream/kubereboot/kured/Chart.yaml | 6 +- .../upstream/kubereboot/kured/README.md | 35 ++++++++-- .../kubereboot/kured/templates/daemonset.yaml | 64 +++++++++++++++++-- .../kubereboot/kured/templates/service.yaml | 2 +- .../kured/templates/servicemonitor.yaml | 2 +- .../kubereboot/kured/values.minikube.yaml | 1 + .../upstream/kubereboot/kured/values.yaml | 51 ++++++++++++++- .../common/kured.yaml.gotmpl | 7 ++ 9 files changed, 150 insertions(+), 20 deletions(-) diff --git a/helmfile.d/upstream/index.yaml b/helmfile.d/upstream/index.yaml index 197d149d95..143196f862 100644 --- a/helmfile.d/upstream/index.yaml +++ b/helmfile.d/upstream/index.yaml @@ -52,7 +52,7 @@ charts: kokuwa/fluentd-elasticsearch: 13.10.0 - kubereboot/kured: 4.5.1 + kubereboot/kured: 5.4.5 kubernetes-external-dns/external-dns: 1.14.4 kubernetes-ingress-nginx/ingress-nginx: 4.9.1 diff --git a/helmfile.d/upstream/kubereboot/kured/Chart.yaml b/helmfile.d/upstream/kubereboot/kured/Chart.yaml index 16793e5e90..d40694a3d5 100644 --- a/helmfile.d/upstream/kubereboot/kured/Chart.yaml +++ b/helmfile.d/upstream/kubereboot/kured/Chart.yaml @@ -1,14 +1,14 @@ apiVersion: v1 -appVersion: 1.13.1 +appVersion: 1.15.1 description: A Helm chart for kured home: https://github.com/kubereboot/kured icon: https://raw.githubusercontent.com/kubereboot/website/main/static/img/kured.png maintainers: - email: christian.kotzbauer@gmail.com - name: ckotzbauer + name: chopf - email: david@davidkarlsen.com name: davidkarlsen name: kured sources: - https://github.com/kubereboot/kured -version: 4.5.1 +version: 5.4.5 diff --git a/helmfile.d/upstream/kubereboot/kured/README.md b/helmfile.d/upstream/kubereboot/kured/README.md index 08f1c79166..eec7f9dd90 100644 --- a/helmfile.d/upstream/kubereboot/kured/README.md +++ b/helmfile.d/upstream/kubereboot/kured/README.md @@ -5,6 +5,7 @@ This chart installs the "Kubernetes Reboot Daemon" using the Helm Package Manage ## Prerequisites - Kubernetes 1.9+ +- Helm 3.8.0+ (to pull the chart from the OCI registry) ## Installing the Chart To install the chart with the release name `my-release`: @@ -13,6 +14,12 @@ $ helm repo add kubereboot https://kubereboot.github.io/charts $ helm install my-release kubereboot/kured ``` +You can also pull the helm chart from the OCI registry `ghcr.io`: + +```bash +$ helm install my-release oci://ghcr.io/kubereboot/charts/kured +``` + ## Uninstalling the Chart To uninstall/delete the `my-release` deployment: ```bash @@ -23,6 +30,13 @@ The command removes all the Kubernetes components associated with the chart and ## Upgrade Notes +### From 4.x to 5.x + +We improved two security-related default-values: +- `hostNetwork` is set to `false` by default now. +- `readOnlyRootFilesystem` is set to `true` by default now. +Both parameters can be configured to its old values from 4.x + ### From 3.x to 4.x We have migrated the code and its release artifacts (helm charts, docker images, manifests) to an @@ -64,33 +78,42 @@ The following changes have been made compared to the stable chart: | Config | Description | Default | | ------ | ----------- | ------- | | `image.repository` | Image repository | `ghcr.io/kubereboot/kured`| -| `image.tag` | Image tag | `1.13.1` | +| `image.tag` | Image tag | `1.15.1` | | `image.pullPolicy` | Image pull policy | `IfNotPresent` | | `image.pullSecrets` | Image pull secrets | `[]` | +| `revisionHistoryLimit` | Number of old history to retain to allow rollback | `10` | | `updateStrategy` | Daemonset update strategy | `RollingUpdate` | | `maxUnavailable` | The max pods unavailable during a rolling update | `1` | | `podAnnotations` | Annotations to apply to pods (eg to add Prometheus annotations) | `{}` | | `dsAnnotations` | Annotations to apply to the kured DaemonSet | `{}` | | `extraArgs` | Extra arguments to pass to `/usr/bin/kured`. See below. | `{}` | | `extraEnvVars` | Array of environment variables to pass to the daemonset. | `{}` | +| `metricsHost` | Host to expose the metrics endpoint. | `""` | +| `metricsPort` | Port to expose the metrics endpoint. | `8080` | +| `configuration.useRebootSentinelHostPath` | Controls whether the chart uses a hostPath to read the sentinel file. | `true` | | `configuration.lockTtl` | cli-parameter `--lock-ttl` | `0` | | `configuration.lockReleaseDelay` | cli-parameter `--lock-release-delay` | `0` | | `configuration.alertFilterRegexp` | cli-parameter `--alert-filter-regexp` | `""` | | `configuration.alertFiringOnly` | cli-parameter `--alert-firing-only` | `false` | +| `configuration.alertFilterMatchOnly` | cli-parameter `--alert-filter-match-only` | `false` | | `configuration.blockingPodSelector` | Array of selectors for multiple cli-parameters `--blocking-pod-selector` | `[]` | | `configuration.endTime` | cli-parameter `--end-time` | `""` | | `configuration.lockAnnotation` | cli-parameter `--lock-annotation` | `""` | | `configuration.period` | cli-parameter `--period` | `""` | | `configuration.forceReboot` | cli-parameter `--force-reboot` | `false` | +| `configuration.drainDelay` | cli-parameter `--drain-delay` | `0` | | `configuration.drainGracePeriod` | cli-parameter `--drain-grace-period` | `""` | | `configuration.drainTimeout` | cli-parameter `--drain-timeout` | `""` | +| `configuration.drainPodSelector` | cli-parameter `--drain-pod-selector` | `""` | | `configuration.skipWaitForDeleteTimeout` | cli-parameter `--skip-wait-for-delete-timeout` | `""` | | `configuration.prometheusUrl` | cli-parameter `--prometheus-url` | `""` | | `configuration.rebootDays` | Array of days for multiple cli-parameters `--reboot-days` | `[]` | | `configuration.rebootSentinel` | cli-parameter `--reboot-sentinel` | `""` | -| `configuration.rebootSentinelCommand` | cli-parameter `--reboot-sentinel-command` | `""` | +| `configuration.rebootSentinelCommand` | Configure your own reboot command to run on the node host OS. Requires `configuration.useRebootSentinelHostPath` to be set to false. `--reboot-sentinel-command` | `""` | | `configuration.rebootCommand` | cli-parameter `--reboot-command` | `""` | | `configuration.rebootDelay` | cli-parameter `--reboot-delay` | `""` | +| `configuration.rebootMethod` | cli-parameter `--reboot-method` | `""` | +| `configuration.rebootSignal` | cli-parameter `--reboot-signal` | `39` (SIGRTMIN+5) | | `configuration.slackChannel` | cli-parameter `--slack-channel`. Passed through `tpl` | `""` | | `configuration.slackHookUrl` | cli-parameter `--slack-hook-url`. Passed through `tpl` | `""` | | `configuration.slackUsername` | cli-parameter `--slack-username`. Passed through `tpl` | `""` | @@ -105,12 +128,13 @@ The following changes have been made compared to the stable chart: | `configuration.preferNoScheduleTaint` | Taint name applied during pending node reboot | `""` | | `configuration.preRebootNodeLabels` | Array of key-value-pairs to add to nodes before cordoning for multiple cli-parameters `--pre-reboot-node-labels` | `[]` | | `configuration.postRebootNodeLabels` | Array of key-value-pairs to add to nodes after uncordoning for multiple cli-parameters `--post-reboot-node-labels` | `[]` | +| `configuration.concurrency` | cli-parameter `--concurrency` | `1` | | `rbac.create` | Create RBAC roles | `true` | | `serviceAccount.create` | Create a service account | `true` | | `serviceAccount.name` | Service account name to create (or use if `serviceAccount.create` is false) | (chart fullname) | | `podSecurityPolicy.create` | Create podSecurityPolicy | `false` | | `containerSecurityContext.privileged ` | Enables `privileged` in container-specific security context | `true` | -| `containerSecurityContext.allowPrivilegeEscalation`| Enables `allowPrivilegeEscalation` in container-specific security context. If not set it won't be configured. | | +| `containerSecurityContext.readOnlyRootFilesystem`| Enables `readOnlyRootFilesystem` in container-specific security context. If not set it won't be configured. | `true` | | `resources` | Resources requests and limits. | `{}` | | `metrics.create` | Create a ServiceMonitor for prometheus-operator | `false` | | `metrics.namespace` | The namespace to create the ServiceMonitor in | `""` | @@ -121,11 +145,13 @@ The following changes have been made compared to the stable chart: | `service.name ` | Service name for the metrics endpoint | `""` | | `service.port` | Port of the service to expose | `8080` | | `service.annotations` | Annotations to apply to the service (eg to add Prometheus annotations) | `{}` | +| `livenessProbe` | Liveness probe for pods | `{"httpGet":{"path":"/metrics","port":"metrics","scheme":"HTTP"},"initialDelaySeconds":10,"periodSeconds":30,"timeoutSeconds":5,"successThreshold":1,"failureThreshold":5}` | +| `readinessProbe` | Readiness probe for pods | `{"httpGet":{"path":"/metrics","port":"metrics","scheme":"HTTP"},"initialDelaySeconds":10,"periodSeconds":30,"timeoutSeconds":5,"successThreshold":1,"failureThreshold":5}` | | `podLabels` | Additional labels for pods (e.g. CostCenter=IT) | `{}` | | `priorityClassName` | Priority Class to be used by the pods | `""` | | `tolerations` | Tolerations to apply to the daemonset (eg to allow running on master) | `[{"key": "node-role.kubernetes.io/control-plane", "effect": "NoSchedule"}]` for Kubernetes 1.24.0 and greater, otherwise `[{"key": "node-role.kubernetes.io/master", "effect": "NoSchedule"}]`| | `affinity` | Affinity for the daemonset (ie, restrict which nodes kured runs on) | `{}` | -| `hostNetwork` | Pod uses the host network instead of the cluster network | `true` | +| `hostNetwork` | Pod uses the host network instead of the cluster network | `false` | | `nodeSelector` | Node Selector for the daemonset (ie, restrict which nodes kured runs on) | `{ "kubernetes.io/os": "linux" }` | | `volumeMounts` | Maps of volumes mount to mount | `{}` | | `volumes` | Maps of volumes to mount | `{}` | @@ -138,7 +164,6 @@ extraArgs: ``` becomes `/usr/bin/kured ... --foo=1 --bar-baz=2`. - ## Prometheus Metrics Kured exposes a single prometheus metric indicating whether a reboot is required or not (see [kured docs](https://github.com/kubereboot/kured#prometheus-metrics)) for details. diff --git a/helmfile.d/upstream/kubereboot/kured/templates/daemonset.yaml b/helmfile.d/upstream/kubereboot/kured/templates/daemonset.yaml index 353be4ab08..050aed5552 100644 --- a/helmfile.d/upstream/kubereboot/kured/templates/daemonset.yaml +++ b/helmfile.d/upstream/kubereboot/kured/templates/daemonset.yaml @@ -12,6 +12,7 @@ metadata: {{- end }} {{- end }} spec: + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} updateStrategy: type: {{ .Values.updateStrategy }} {{- if eq .Values.updateStrategy "RollingUpdate"}} @@ -65,6 +66,12 @@ spec: args: - --ds-name={{ template "kured.fullname" . }} - --ds-namespace={{ .Release.Namespace }} + {{- if .Values.configuration.metricsHost }} + - --metrics-host={{ .Values.configuration.metricsHost }} + {{- end }} + {{- if .Values.configuration.metricsPort }} + - --metrics-port={{ .Values.configuration.metricsPort }} + {{- end }} {{- if .Values.configuration.lockTtl }} - --lock-ttl={{ .Values.configuration.lockTtl }} {{- end }} @@ -77,6 +84,9 @@ spec: {{- if .Values.configuration.alertFiringOnly }} - --alert-firing-only={{ .Values.configuration.alertFiringOnly }} {{- end }} + {{- if .Values.configuration.alertFilterMatchOnly }} + - --alert-filter-match-only={{ .Values.configuration.alertFilterMatchOnly }} + {{- end }} {{- range .Values.configuration.blockingPodSelector }} - --blocking-pod-selector={{ . }} {{- end }} @@ -95,6 +105,12 @@ spec: {{- if .Values.configuration.drainGracePeriod }} - --drain-grace-period={{ .Values.configuration.drainGracePeriod }} {{- end }} + {{- if .Values.configuration.drainPodSelector }} + - --drain-pod-selector={{ .Values.configuration.drainPodSelector }} + {{- end }} + {{- if .Values.configuration.drainDelay }} + - --drain-delay={{ .Values.configuration.drainDelay }} + {{- end }} {{- if .Values.configuration.drainTimeout }} - --drain-timeout={{ .Values.configuration.drainTimeout }} {{- end }} @@ -114,9 +130,13 @@ spec: - --post-reboot-node-labels={{ . }} {{- end }} {{- if .Values.configuration.rebootSentinel }} + {{- if .Values.configuration.useRebootSentinelHostPath }} + - --reboot-sentinel=/sentinel/{{ base .Values.configuration.rebootSentinel }} + {{ else }} - --reboot-sentinel={{ .Values.configuration.rebootSentinel }} + {{- end }} {{- end }} - {{- if .Values.configuration.rebootSentinelCommand }} + {{- if and .Values.configuration.rebootSentinelCommand (not .Values.configuration.useRebootSentinelHostPath) }} - --reboot-sentinel-command={{ .Values.configuration.rebootSentinelCommand }} {{- end }} {{- if .Values.configuration.rebootCommand }} @@ -125,6 +145,12 @@ spec: {{- if .Values.configuration.rebootDelay }} - --reboot-delay={{ .Values.configuration.rebootDelay }} {{- end }} + {{- if .Values.configuration.rebootMethod }} + - --reboot-method={{ .Values.configuration.rebootMethod }} + {{- end }} + {{- if .Values.configuration.rebootSignal }} + - --reboot-signal={{ .Values.configuration.rebootSignal }} + {{- end }} {{- if .Values.configuration.slackChannel }} - --slack-channel={{ tpl .Values.configuration.slackChannel . }} {{- end }} @@ -161,6 +187,9 @@ spec: {{- if .Values.configuration.logFormat }} - --log-format={{ .Values.configuration.logFormat }} {{- end }} + {{- if .Values.configuration.concurrency }} + - --concurrency={{ .Values.configuration.concurrency }} + {{- end }} {{- range $key, $value := .Values.extraArgs }} {{- if $value }} - --{{ $key }}={{ $value }} @@ -168,12 +197,19 @@ spec: - --{{ $key }} {{- end }} {{- end }} -{{- if .Values.volumeMounts }} + {{- if or .Values.volumeMounts .Values.configuration.useRebootSentinelHostPath }} volumeMounts: -{{- toYaml .Values.volumeMounts | nindent 12 }} -{{- end }} + {{- end }} + {{- if .Values.configuration.useRebootSentinelHostPath }} + - mountPath: /sentinel + name: sentinel + readOnly: true + {{- end }} + {{- if .Values.volumeMounts }} +{{- toYaml .Values.volumeMounts | nindent 10 }} + {{- end }} ports: - - containerPort: 8080 + - containerPort: {{ .Values.configuration.metricsPort }} name: metrics env: # Pass in the name of the node on which this pod is scheduled @@ -185,6 +221,12 @@ spec: {{- if .Values.extraEnvVars }} {{ toYaml .Values.extraEnvVars | nindent 12 }} {{- end }} + {{- with .Values.livenessProbe }} + livenessProbe: {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: {{- toYaml . | nindent 12 }} + {{- end }} tolerations: {{- if .Values.tolerations }} {{- with .Values.tolerations }} @@ -204,7 +246,15 @@ spec: affinity: {{ toYaml . | indent 8 }} {{- end }} -{{- if .Values.volumes }} + {{- if or .Values.volumes .Values.configuration.useRebootSentinelHostPath }} volumes: + {{- end }} + {{- if .Values.configuration.useRebootSentinelHostPath }} + - name: sentinel + hostPath: + path: {{ dir .Values.configuration.rebootSentinel }} + type: Directory + {{- end }} + {{- if .Values.volumes }} {{- toYaml .Values.volumes | nindent 8 }} -{{- end }} + {{- end }} diff --git a/helmfile.d/upstream/kubereboot/kured/templates/service.yaml b/helmfile.d/upstream/kubereboot/kured/templates/service.yaml index 0a6e437658..764d787a16 100644 --- a/helmfile.d/upstream/kubereboot/kured/templates/service.yaml +++ b/helmfile.d/upstream/kubereboot/kured/templates/service.yaml @@ -20,7 +20,7 @@ spec: ports: - name: metrics port: {{ .Values.service.port }} - targetPort: 8080 + targetPort: {{ .Values.configuration.metricsPort }} {{- if eq .Values.service.type "NodePort" }} nodePort: {{ .Values.service.nodePort }} {{- end }} diff --git a/helmfile.d/upstream/kubereboot/kured/templates/servicemonitor.yaml b/helmfile.d/upstream/kubereboot/kured/templates/servicemonitor.yaml index 7e36a0e404..6853bec396 100644 --- a/helmfile.d/upstream/kubereboot/kured/templates/servicemonitor.yaml +++ b/helmfile.d/upstream/kubereboot/kured/templates/servicemonitor.yaml @@ -18,7 +18,7 @@ spec: scrapeTimeout: {{ .Values.metrics.scrapeTimeout }} {{- end }} honorLabels: true - targetPort: 8080 + port: metrics path: /metrics scheme: http jobLabel: "{{ .Release.Name }}" diff --git a/helmfile.d/upstream/kubereboot/kured/values.minikube.yaml b/helmfile.d/upstream/kubereboot/kured/values.minikube.yaml index 15168f930d..5df8253779 100644 --- a/helmfile.d/upstream/kubereboot/kured/values.minikube.yaml +++ b/helmfile.d/upstream/kubereboot/kured/values.minikube.yaml @@ -12,6 +12,7 @@ configuration: period: "1m" # reboot check period (default 1h0m0s) # forceReboot: false # force a reboot even if the drain fails or times out (default: false) # drainGracePeriod: "" # time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1) + # drainPodSelector: "" # only drain pods with labels matching the selector (default: '', all pods) # drainTimeout: "" # timeout after which the drain is aborted (default: 0, infinite time) # skipWaitForDeleteTimeout: "" # when time is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0) # prometheusUrl: "" # Prometheus instance to probe for active alerts diff --git a/helmfile.d/upstream/kubereboot/kured/values.yaml b/helmfile.d/upstream/kubereboot/kured/values.yaml index 132f413c2e..4326dfc2d7 100644 --- a/helmfile.d/upstream/kubereboot/kured/values.yaml +++ b/helmfile.d/upstream/kubereboot/kured/values.yaml @@ -4,11 +4,15 @@ image: pullPolicy: IfNotPresent pullSecrets: [] +revisionHistoryLimit: 10 + updateStrategy: RollingUpdate # requires RollingUpdate updateStrategy maxUnavailable: 1 podAnnotations: {} +# container.apparmor.security.beta.kubernetes.io/kured: unconfined + dsAnnotations: {} extraArgs: {} @@ -23,23 +27,31 @@ extraEnvVars: # value: 123 configuration: + useRebootSentinelHostPath: true # not actually a flag, indicates if the sentinel should be mounted + metricsHost: "" # host where metrics will listen + metricsPort: 8080 # port number where metrics will listen lockTtl: 0 # force clean annotation after this amount of time (default 0, disabled) alertFilterRegexp: "" # alert names to ignore when checking for active alerts alertFiringOnly: false # only consider firing alerts when checking for active alerts + alertFilterMatchOnly: false # Only block if the alert-filter-regexp matches active alerts blockingPodSelector: [] # label selector identifying pods whose presence should prevent reboots endTime: "" # only reboot before this time of day (default "23:59") lockAnnotation: "" # annotation in which to record locking node (default "weave.works/kured-node-lock") period: "" # reboot check period (default 1h0m0s) forceReboot: false # force a reboot even if the drain fails or times out (default: false) drainGracePeriod: "" # time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1) + drainPodSelector: "" # only drain pods with labels matching the selector (default: '', all pods) + drainDelay: 0 # delay drain for this duration (default: 0, disabled) drainTimeout: "" # timeout after which the drain is aborted (default: 0, infinite time) skipWaitForDeleteTimeout: "" # when time is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0) prometheusUrl: "" # Prometheus instance to probe for active alerts rebootDays: [] # only reboot on these days (default [su,mo,tu,we,th,fr,sa]) - rebootSentinel: "" # path to file whose existence signals need to reboot (default "/var/run/reboot-required") + rebootSentinel: "/var/run/reboot-required" # path to file whose existence signals need to reboot (default "/var/run/reboot-required") rebootSentinelCommand: "" # command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored. rebootCommand: "/bin/systemctl reboot" # command to run when a reboot is required by the sentinel rebootDelay: "" # add a delay after drain finishes but before the reboot command is issued + rebootMethod: "" # method to use for reboots (default command), available: command, signal + rebootSignal: "" # signal to use for reboots (default 39 = SIGRTMIN+5). slackChannel: "" # slack channel for reboot notifications slackHookUrl: "" # slack hook URL for reboot notifications slackUsername: "" # slack username for reboot notifications (default "kured") @@ -55,6 +67,7 @@ configuration: logFormat: "text" # log format specified as text or json, defaults to text preRebootNodeLabels: [] # labels to add to nodes before cordoning (default []) postRebootNodeLabels: [] # labels to add to nodes after uncordoning (default []) + concurrency: 1 # amount of nodes to concurrently reboot. (default 1) rbac: @@ -69,11 +82,23 @@ podSecurityPolicy: containerSecurityContext: privileged: true # Give permission to nsenter /proc/1/ns/mnt + readOnlyRootFilesystem: true # allowPrivilegeEscalation: true # Needed when using defaultAllowPrivilegedEscalation: false in psp +# Use the following security-context when "configuration.rebootMethod=signal and configuration.useRebootSentinelHostPath=true" +# containerSecurityContext: +# privileged: false +# readOnlyRootFilesystem: true +# allowPrivilegeEscalation: false +# capabilities: +# add: +# - CAP_KILL +# drop: +# - '*' + resources: {} -hostNetwork: true +hostNetwork: false metrics: create: false @@ -89,6 +114,28 @@ service: name: "" type: ClusterIP +livenessProbe: + httpGet: + path: /metrics + port: metrics + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 5 + +readinessProbe: + httpGet: + path: /metrics + port: metrics + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 5 + podLabels: {} priorityClassName: "" diff --git a/helmfile.d/values/podsecuritypolicies/common/kured.yaml.gotmpl b/helmfile.d/values/podsecuritypolicies/common/kured.yaml.gotmpl index c49764338e..c7ce6e58e1 100644 --- a/helmfile.d/values/podsecuritypolicies/common/kured.yaml.gotmpl +++ b/helmfile.d/values/podsecuritypolicies/common/kured.yaml.gotmpl @@ -9,3 +9,10 @@ constraints: privileged: true runAsUser: rule: RunAsAny + volumes: + - emptyDir + - projected + - hostPath + - configMap + allowedHostPaths: + - pathPrefix: "/var/run"