diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 170fa5718..b1c5b669d 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -21,6 +21,7 @@ runs: echo "cluster-api-provider-openstack=$(jq -r '.["cluster-api-provider-openstack"]' "$DEPENDENCIES_PATH")" >> $GITHUB_OUTPUT echo "cert-manager=$(jq -r '.["cert-manager"]' "$DEPENDENCIES_PATH")" >> $GITHUB_OUTPUT echo "helm=$(jq -r '.["helm"]' "$DEPENDENCIES_PATH")" >> $GITHUB_OUTPUT + echo "openstack-resource-controller=$(jq -r '.["openstack-resource-controller"]' "$DEPENDENCIES_PATH")" >> $GITHUB_OUTPUT echo "sonobuoy=$(jq -r '.["sonobuoy"]' "$DEPENDENCIES_PATH")" >> $GITHUB_OUTPUT env: DEPENDENCIES_PATH: ${{ inputs.dependencies-path }} @@ -58,7 +59,7 @@ runs: --namespace cert-manager \ --create-namespace \ --install \ - --set installCRDs=true \ + --set crds.enabled=true \ --wait \ --timeout 10m @@ -93,6 +94,16 @@ runs: # unreleased tags kubernetes-sigs/cluster-api#7889 GOPROXY: off + - name: Install openstack-resource-controller + shell: bash + run: | + ORC_URL=https://github.com/k-orc/openstack-resource-controller/releases/download/${ORC_VERSION}/install.yaml + if ! kubectl apply --server-side -f ${ORC_URL}; then + kubectl apply --server-side --force-conflicts -f ${ORC_URL} + fi + env: + ORC_VERSION: ${{ steps.deps.outputs.openstack-resource-controller }} + - name: Install Cluster API add-on provider shell: bash run: |- diff --git a/.github/actions/upgrade-and-test/action.yml b/.github/actions/upgrade-and-test/action.yml index 19e39a299..5373a494c 100644 --- a/.github/actions/upgrade-and-test/action.yml +++ b/.github/actions/upgrade-and-test/action.yml @@ -45,6 +45,12 @@ inputs: One of certified-conformance, conformance-lite, non-disruptive-conformance, quick. required: true default: quick + sonobuoy-level: + description: | + The log level for the Sonobuoy run. + One of panic, fatal, error, warn, info, debug, trace. + required: true + default: info sonobuoy-upload: description: Specify "yes" to upload the Sonobuoy run as an artifact required: true @@ -71,6 +77,22 @@ runs: --set kubernetesVersion=${{ inputs.kubernetes-version }} \ --set machineImageId=${{ inputs.image-id }} + - name: Check for cluster deployment ready condition name + id: check-ready-condition + shell: bash + run: |- + cluster_api_version=$(kubectl api-resources \ + --api-group=cluster.x-k8s.io \ + --no-headers=true |\ + grep ^clusters |\ + awk '{print $3}') + + if [[ "$cluster_api_version" =~ v1beta1$ ]]; then + echo "cluster-ready-condition-name=ready" >> $GITHUB_OUTPUT + else + echo "cluster-ready-condition-name=available" >> $GITHUB_OUTPUT + fi + # Wait for any upgrade to start before checking if it is complete # This is to make sure the controller has actioned the update before # progressing to wait for ready @@ -82,7 +104,7 @@ runs: shell: bash run: |- kubectl wait clusters/${{ inputs.name }} \ - --for=condition=ready=false \ + --for=condition=${{ steps.check-ready-condition.outputs.cluster-ready-condition-name }}=false \ --timeout 2m continue-on-error: true @@ -90,7 +112,7 @@ runs: shell: bash run: |- kubectl wait clusters/${{ inputs.name }} \ - --for=condition=ready \ + --for=condition=${{ steps.check-ready-condition.outputs.cluster-ready-condition-name }} \ --timeout 30m - name: Wait for machine deployments to be running @@ -101,6 +123,30 @@ runs: --for=jsonpath='{.status.phase}'=Running \ --timeout 30m + - name: Wait for control-plane and worker machines to be up-to-date and ready (v1beta2) + shell: bash + run: |- + kubectl wait clusters/${{ inputs.name }} \ + --for=condition=RollingOut=false \ + --timeout 30m \ + && \ + kubectl wait clusters/${{ inputs.name }} \ + --for=condition=ControlPlaneMachinesUpToDate \ + --timeout 30m \ + && \ + kubectl wait clusters/${{ inputs.name }} \ + --for=condition=ControlPlaneMachinesReady \ + --timeout 30m \ + && \ + kubectl wait clusters/${{ inputs.name }} \ + --for=condition=WorkerMachinesUpToDate \ + --timeout 30m \ + && \ + kubectl wait clusters/${{ inputs.name }} \ + --for=condition=WorkerMachinesReady \ + --timeout 30m + if: ${{ steps.check-ready-condition.outputs.cluster-ready-condition-name == 'available' }} + - name: Wait for addons to deploy shell: bash run: |- @@ -138,9 +184,15 @@ runs: KUBECONFIG: ./kubeconfig if: "${{ inputs.skip-workload-status != 'yes' }}" + - name: Get cluster nodes + shell: bash + run: kubectl get nodes -o wide + env: + KUBECONFIG: ./kubeconfig + - name: Run sonobuoy shell: bash - run: sonobuoy run --mode ${{ inputs.sonobuoy-mode }} --wait + run: sonobuoy run --level ${{ inputs.sonobuoy-level }} --mode ${{ inputs.sonobuoy-mode }} --wait env: KUBECONFIG: ./kubeconfig diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index f11bd619b..fa47f77e7 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -32,7 +32,7 @@ jobs: --all \ --validate-maintainers=false - - name: Run template validation + - name: Run template validation (current values) run: |- helm template foo charts/openstack-cluster \ -f charts/openstack-cluster/tests/values_base.yaml \ @@ -45,5 +45,32 @@ jobs: # NOTE: Run the following command locally to generate updated snapshots: # docker run -i --rm -v $(pwd):/apps helmunittest/helm-unittest charts/openstack-cluster -u - - name: Run manifest snapshot test + - name: Run manifest snapshot test (current values) + run: docker run -i --rm -v $(pwd):/apps helmunittest/helm-unittest charts/openstack-cluster + - name: Get latest tag + id: latest-tag + run: | + set -eo pipefail + TAG_NAME="$(curl -fsSL "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/releases/latest" | jq -r '.tag_name')" + echo "tag-name=${TAG_NAME}" >> "$GITHUB_OUTPUT" + - name: Checkout default values from latest tag + run: | + git checkout $LATEST_TAG -- charts/openstack-cluster/values.yaml + env: + LATEST_TAG: ${{ steps.latest-tag.outputs.tag-name }} + + - name: Run template validation (values from latest tag) + run: |- + helm template foo charts/openstack-cluster \ + -f charts/openstack-cluster/tests/values_base.yaml \ + -f charts/openstack-cluster/tests/values_full.yaml \ + | docker run -i --rm ghcr.io/yannh/kubeconform:latest \ + --strict --summary \ + --schema-location default \ + --schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \ + --skip HelmRelease,Manifests,OpenStackCluster,OpenStackMachineTemplate + + # NOTE: Run the following command locally to generate updated snapshots: + # docker run -i --rm -v $(pwd):/apps helmunittest/helm-unittest charts/openstack-cluster -u + - name: Run manifest snapshot test (values from latest tag) run: docker run -i --rm -v $(pwd):/apps helmunittest/helm-unittest charts/openstack-cluster diff --git a/.github/workflows/update-dependencies.yml b/.github/workflows/update-dependencies.yml index fa59db4fe..29402a8fc 100644 --- a/.github/workflows/update-dependencies.yml +++ b/.github/workflows/update-dependencies.yml @@ -42,6 +42,10 @@ jobs: - key: helm type: github repo: helm/helm + + - key: openstack-resource-controller + type: github + repo: k-orc/openstack-resource-controller - key: sonobuoy type: github diff --git a/charts/openstack-cluster/README.md b/charts/openstack-cluster/README.md index b26982a94..168a553d7 100644 --- a/charts/openstack-cluster/README.md +++ b/charts/openstack-cluster/README.md @@ -48,8 +48,8 @@ templates for more details. First, you must set up a [Cluster API management cluster](https://cluster-api.sigs.k8s.io/user/concepts.html#management-cluster) -with the [OpenStack Infrastructure Provider](https://github.com/kubernetes-sigs/cluster-api-provider-openstack) -installed. +with both the [OpenStack Infrastructure Provider](https://github.com/kubernetes-sigs/cluster-api-provider-openstack) +and [OpenStack Resource Controller](https://github.com/k-orc/openstack-resource-controller) installed. Addons are managed by the [Cluster API Addon Provider](https://github.com/azimuth-cloud/cluster-api-addon-provider), diff --git a/charts/openstack-cluster/templates/_helpers.tpl b/charts/openstack-cluster/templates/_helpers.tpl index b39f6d1a1..822ae586b 100644 --- a/charts/openstack-cluster/templates/_helpers.tpl +++ b/charts/openstack-cluster/templates/_helpers.tpl @@ -131,7 +131,8 @@ Outputs the node registration object for setting node labels. {{- define "openstack-cluster.nodeRegistration.nodeLabels" -}} nodeRegistration: kubeletExtraArgs: - node-labels: "{{ range $i, $k := (keys . | sortAlpha) }}{{ if ne $i 0 }},{{ end }}{{ $k }}={{ index $ $k }}{{ end }}" + - name: "node-labels" + value: "{{ range $i, $k := (keys . | sortAlpha) }}{{ if ne $i 0 }},{{ end }}{{ $k }}={{ index $ $k }}{{ end }}" {{- end }} {{/* @@ -272,7 +273,7 @@ files: {{- range $registry, $registrySpec := . }} - path: /etc/containerd/certs.d/{{ $registry }}/hosts.toml content: | - {{- include "openstack-cluster.registryFile" (list $registry $registrySpec) | nindent 6 }} + {{- include "openstack-cluster.registryFile" (list $registry $registrySpec) | indent 6 }} owner: root:root permissions: "0644" {{- end }} @@ -330,6 +331,44 @@ preKubeadmCommands: {{- end }} {{- end }} +{{/* +Converts kubeadmConfigSpec.extraArgs kubeadmConfigSpec.kubeletExtraArgs to v1beta2 style +*/}} +{{- define "openstack-cluster.kubeadmConfigSpec.convert.extraArgs.helper" -}} +{{- $name := index . 0 }} +{{- $extraArgs := index . 1 }} +{{ $name }}: {{ include "openstack-cluster.dict2items" $extraArgs | nindent 2 }} +{{- end -}} + + +{{/* +Converts kubeadmConfigSpec.extraArgs kubeadmConfigSpec.kubeletExtraArgs to v1beta2 style +*/}} +{{- define "openstack-cluster.kubeadmConfigSpec.convert.extraArgs" -}} +{{- if hasKey . "initConfiguration" -}} +{{- with .initConfiguration.nodeRegistration -}} +{{- $kubeletExtraArgs := deepCopy .kubeletExtraArgs -}} +{{- $_ := mergeOverwrite . (include "openstack-cluster.kubeadmConfigSpec.convert.extraArgs.helper" (list "kubeletExtraArgs" $kubeletExtraArgs) | fromYaml ) -}} +{{- end -}} +{{- end -}} +{{- if hasKey . "joinConfiguration" -}} +{{- with .joinConfiguration.nodeRegistration -}} +{{- $kubeletExtraArgs := deepCopy .kubeletExtraArgs -}} +{{- $_ := mustMergeOverwrite . (include "openstack-cluster.kubeadmConfigSpec.convert.extraArgs.helper" (list "kubeletExtraArgs" $kubeletExtraArgs) | fromYaml ) -}} +{{- end -}} +{{- end -}} +{{- if hasKey . "clusterConfiguration" -}} +{{- with .clusterConfiguration.controllerManager -}} +{{- $extraArgs := deepCopy .extraArgs -}} +{{- $_ := mustMergeOverwrite . (include "openstack-cluster.kubeadmConfigSpec.convert.extraArgs.helper" (list "extraArgs" $extraArgs) | fromYaml ) -}} +{{- end -}} +{{- with .clusterConfiguration.scheduler -}} +{{- $extraArgs := deepCopy .extraArgs -}} +{{- $_ := mustMergeOverwrite . (include "openstack-cluster.kubeadmConfigSpec.convert.extraArgs.helper" (list "extraArgs" $extraArgs) | fromYaml ) -}} +{{- end -}} +{{- end -}} +{{- end -}} + {{/* Produces the spec for a KubeadmConfig object. */}} @@ -414,7 +453,8 @@ webhooks and policies for audit logging can be added here. clusterConfiguration: apiServer: extraArgs: - v: {{ $ctx.Values.apiServer.logLevel | quote }} + - name: "v" + value: {{ $ctx.Values.apiServer.logLevel | quote }} {{- if ne $authWebhook "none" }} {{- if eq $authWebhook "azimuth-authorization-webhook" }} authorization-config: /etc/kubernetes/webhooks/authorization_config.yaml @@ -576,3 +616,164 @@ Produces integration for azimuth_authorization_webhook on apiserver name: {{ .name }} {{- end }} {{- end }} + +{{/* +Converts a dict to a list of items +*/}} +{{- define "openstack-cluster.dict2items" -}} +{{- if kindIs "map" . -}} +{{- $items := list -}} +{{- range $key, $value := . -}} +{{- $item := dict "name" $key "value" $value -}} +{{- $items = append $items $item -}} +{{- end -}} +{{ toYaml $items }} +{{- else -}} +{{ toYaml . }} +{{- end -}} +{{- end -}} + +{{/* +Converts rolloutStrategy to rollout.strategy for v1beta2 +*/}} +{{- define "openstack-cluster.convert.rolloutStrategy" -}} +{{- if hasKey . "rolloutStrategy" -}} +{{- $rollout := dict -}} +{{- $rolloutStrategy := deepCopy .rolloutStrategy -}} +{{- range $k, $v := $rolloutStrategy -}} +{{- if kindIs "map" $v -}} +{{- $k := unset $v "deletePolicy" -}} +{{- end -}} +{{- end -}} +{{- $_ := set $rollout "strategy" $rolloutStrategy }} +{{- $rollout | toYaml -}} +{{- else -}} +{{- .rollout | toYaml -}} +{{- end -}} +{{- end -}} + +{{/* +Converts rolloutStrategy..deletePolicy to deletion.order for v1beta2 +*/}} +{{- define "openstack-cluster.convert.deletePolicy" -}} +{{- $nodeGroup := . -}} +{{- if hasKey . "rolloutStrategy" -}} +{{- range $k, $v := .rolloutStrategy -}} +{{- if kindIs "map" $v -}} +{{- if hasKey $v "deletePolicy" -}} +{{- get $v "deletePolicy" -}} +{{- else -}} +{{ $nodeGroup.deletion.order }} +{{- end -}} +{{- end -}} +{{- end -}} +{{- else -}} +{{- $nodeGroup.deletion.order -}} +{{- end -}} +{{- end -}} + +{{/* +Converts remediationStrategy to remediation for v1beta2 +*/}} +{{- define "openstack-cluster.convert.remediationStrategy" -}} +{{- if hasKey . "remediationStrategy" -}} +{{- $remediation := dict }} +{{- range $k, $v := .remediationStrategy -}} +{{- if eq $k "maxRetry" }} +{{- $remediation := set $remediation $k $v -}} +{{- else -}} +{{- $remediation := set $remediation (printf "%sSeconds" $k) (include "openstack-cluster.convert.humanTimeToSeconds" $v | int ) -}} +{{- end -}} +{{- end -}} +{{- $remediation | toYaml -}} +{{- else -}} +{{- .remediation | toYaml -}} +{{- end -}} +{{- end -}} + +{{/* +Convert nodeDrainTimeout to nodeDrainTimeoutSeconds +*/}} +{{- define "openstack-cluster.convert.nodeDrainTimeout" -}} +{{- if hasKey . "nodeDrainTimeout" -}} +{{- include "openstack-cluster.convert.humanTimeToSeconds" .nodeDrainTimeout -}} +{{- else -}} +{{- .nodeDrainTimeoutSeconds -}} +{{- end -}} +{{- end -}} + +{{/* +Convert nodeVolumeDetachTimeout to volumeDetachTimeoutSeconds +*/}} +{{- define "openstack-cluster.convert.volumeDetachTimeout" -}} +{{- if hasKey . "nodeVolumeDetachTimeout" -}} +{{- include "openstack-cluster.convert.humanTimeToSeconds" .nodeVolumeDetachTimeout -}} +{{- else -}} +{{- .nodeVolumeDetachTimeoutSeconds -}} +{{- end -}} +{{- end -}} + +{{/* +Convert nodeDeletionTimeout to nodeDeletionTimeoutSeconds +*/}} +{{- define "openstack-cluster.convert.nodeDeletionTimeout" -}} +{{- if hasKey . "nodeDeletionTimeout" -}} +{{- include "openstack-cluster.convert.humanTimeToSeconds" .nodeDeletionTimeout -}} +{{- else -}} +{{- .nodeDeletionTimeoutSeconds -}} +{{- end -}} +{{- end -}} + +{{/* +Convert healthCheck to v1beta2 +*/}} +{{- define "openstack-cluster.convert.healthCheck" -}} +{{- if hasKey . "checks" -}} +{{- . | toYaml -}} +{{- else -}} +{{- $healthcheck := dict -}} +{{- $healthcheckchecks := dict -}} +{{- $healthcheckremediation := dict -}} +{{- if hasKey . "nodeStartupTimeout" -}} +{{- $_ := set $healthcheckchecks "nodeStartupTimeoutSeconds" (include "openstack-cluster.convert.humanTimeToSeconds" .nodeStartupTimeout | int) -}} +{{- end -}} +{{- if hasKey . "unhealthyConditions" -}} +{{- $cond := list -}} +{{- range $conditions := .unhealthyConditions -}} +{{- $condition := dict "type" $conditions.type "status" $conditions.status "timeoutSeconds" (include "openstack-cluster.convert.humanTimeToSeconds" $conditions.timeout | int) -}} +{{- $cond = append $cond $condition -}} +{{- $_ := set $healthcheckchecks "unhealthyNodeConditions" $cond -}} +{{- end -}} +{{- if hasKey . "maxUnhealthy" -}} +{{- $_ := set $healthcheckremediation "triggerIf" (dict "unhealthyLessThanOrEqualTo" .maxUnhealthy) -}} +{{- end -}} +{{- $_ := set $healthcheck "checks" $healthcheckchecks -}} +{{- $_ := set $healthcheck "remediation" $healthcheckremediation -}} +{{- end -}} +{{- $healthcheck | toYaml -}} +{{- end -}} +{{- end -}} + +{{/* +Converts human time to seconds +*/}} +{{- define "openstack-cluster.convert.humanTimeToSeconds" -}} +{{- if kindIs "int" . -}} +{{ . }} +{{- else -}} +{{- $seconds := 0 -}} +{{- if regexMatch "h" . -}} +{{- $hours := regexFind "[0-9]+h" . | trimSuffix "h" | int }} +{{- $seconds = add $seconds (mul 60 60 $hours) -}} +{{- end -}} +{{- if regexMatch "m" . -}} +{{- $mins := regexFind "[0-9]+m" . | trimSuffix "m" | int }} +{{- $seconds = add $seconds (mul 60 $mins) -}} +{{- end -}} +{{- if regexMatch "s" . -}} +{{- $secs := regexFind "[0-9]+s" . | trimSuffix "s" | int }} +{{- $seconds = add $seconds $secs -}} +{{- end -}} +{{ $seconds }} +{{- end -}} +{{- end -}} diff --git a/charts/openstack-cluster/templates/cluster-openstack.yaml b/charts/openstack-cluster/templates/cluster-openstack.yaml index 92ecdf3a8..db99281d1 100644 --- a/charts/openstack-cluster/templates/cluster-openstack.yaml +++ b/charts/openstack-cluster/templates/cluster-openstack.yaml @@ -37,6 +37,9 @@ spec: - {{ include "openstack-cluster.convert.neutronFilter" . | nindent 6 }} {{- end }} {{- else }} + {{- with .networkMTU }} + networkMTU: {{ . }} + {{- end }} managedSubnets: - cidr: {{ .nodeCidr }} {{- with (default $.Values.clusterNetworking.dnsNameservers .dnsNameservers) }} diff --git a/charts/openstack-cluster/templates/cluster.yaml b/charts/openstack-cluster/templates/cluster.yaml index 4272b9555..67373236d 100644 --- a/charts/openstack-cluster/templates/cluster.yaml +++ b/charts/openstack-cluster/templates/cluster.yaml @@ -1,5 +1,5 @@ --- -apiVersion: cluster.x-k8s.io/v1beta1 +apiVersion: cluster.x-k8s.io/v1beta2 kind: Cluster metadata: name: {{ include "openstack-cluster.clusterName" . }} @@ -8,12 +8,10 @@ metadata: spec: clusterNetwork: {{ .Values.kubeNetwork | toYaml | nindent 4 }} controlPlaneRef: - apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + apiGroup: controlplane.cluster.x-k8s.io kind: KubeadmControlPlane name: {{ include "openstack-cluster.componentName" (list . "control-plane") }} - namespace: {{ .Release.Namespace }} infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + apiGroup: infrastructure.cluster.x-k8s.io kind: OpenStackCluster name: {{ include "openstack-cluster.clusterName" . }} - namespace: {{ .Release.Namespace }} diff --git a/charts/openstack-cluster/templates/control-plane/kubeadm-control-plane.yaml b/charts/openstack-cluster/templates/control-plane/kubeadm-control-plane.yaml index b237f1b72..8e3f4b732 100644 --- a/charts/openstack-cluster/templates/control-plane/kubeadm-control-plane.yaml +++ b/charts/openstack-cluster/templates/control-plane/kubeadm-control-plane.yaml @@ -31,13 +31,20 @@ preKubeadmCommands: clusterConfiguration: apiServer: extraArgs: - oidc-issuer-url: {{ quote .issuerUrl }} - oidc-client-id: {{ required "clientId is required for OIDC" .clientId | quote }} - oidc-username-claim: {{ quote .usernameClaim }} - oidc-username-prefix: {{ quote .usernamePrefix }} - oidc-groups-claim: {{ quote .groupsClaim }} - oidc-groups-prefix: {{ quote .groupsPrefix }} - oidc-signing-algs: {{ quote .signingAlgs }} + - name: "oidc-issuer-url" + value: "{{ quote .issuerUrl }}" + - name: "oidc-client-id" + value: "{{ required "clientId is required for OIDC" .clientId | quote }}" + - name: "oidc-username-claim" + value: "{{ quote .usernameClaim }}" + - name: "oidc-username-prefix" + value: "{{ quote .usernamePrefix }}" + - name: "oidc-groups-claim" + value: "{{ quote .groupsClaim }}" + - name: "oidc-groups-prefix" + value: "{{ quote .groupsPrefix }}" + - name: "oidc-signing-algs" + value: "{{ quote .signingAlgs }}" {{- end }} {{- end }} {{- end }} @@ -47,7 +54,7 @@ clusterConfiguration: etcd: local: dataDir: {{ .Values.etcd.dataDir }} - extraArgs: {{ toYaml .Values.etcd.extraArgs | nindent 8 }} + extraArgs: {{ include "openstack-cluster.dict2items" .Values.etcd.extraArgs | nindent 8 }} {{- if .Values.etcd.blockDevice }} # Tell kubeadm to ignore the fact that the etcd datadir contains lost+found initConfiguration: @@ -83,7 +90,8 @@ joinConfiguration: clusterConfiguration: apiServer: extraArgs: - encryption-provider-config: /etc/kubernetes/enc/enc.yaml + - name: encryption-provider-config + value: /etc/kubernetes/enc/enc.yaml files: - path: /etc/kubernetes/patches/kube-apiserver1+strategic.yaml permissions: "0644" @@ -150,7 +158,8 @@ files: clusterConfiguration: apiServer: extraArgs: - admission-control-config-file: /etc/kubernetes/admission/configuration.yaml + - name: "admission-control-config-file" + value: "/etc/kubernetes/admission/configuration.yaml" extraVolumes: - name: admission-configuration hostPath: /etc/kubernetes/admission @@ -160,7 +169,8 @@ clusterConfiguration: {{- end }} --- -apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +{{- include "openstack-cluster.kubeadmConfigSpec.convert.extraArgs" .Values.controlPlane.kubeadmConfigSpec }} +apiVersion: controlplane.cluster.x-k8s.io/v1beta2 kind: KubeadmControlPlane metadata: name: {{ include "openstack-cluster.componentName" (list . "control-plane") }} @@ -177,19 +187,20 @@ spec: trimPrefix "v" }} replicas: {{ .Values.controlPlane.machineCount }} - remediationStrategy: {{ toYaml .Values.controlPlane.remediationStrategy | nindent 4 }} - rolloutStrategy: {{ toYaml .Values.controlPlane.rolloutStrategy | nindent 4 }} + remediation: {{ include "openstack-cluster.convert.remediationStrategy" .Values.controlPlane | nindent 4 }} + rollout: {{ include "openstack-cluster.convert.rolloutStrategy" .Values.controlPlane | nindent 4 }} machineTemplate: metadata: labels: {{ include "openstack-cluster.componentSelectorLabels" (list . "control-plane") | nindent 8 }} - infrastructureRef: - kind: OpenStackMachineTemplate - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 - name: {{ include "openstack-cluster.controlplane.mt.name" . }} - namespace: {{ .Release.Namespace }} - nodeDrainTimeout: {{ .Values.controlPlane.nodeDrainTimeout }} - nodeVolumeDetachTimeout: {{ .Values.controlPlane.nodeVolumeDetachTimeout }} - nodeDeletionTimeout: {{ .Values.controlPlane.nodeDeletionTimeout }} + spec: + infrastructureRef: + kind: OpenStackMachineTemplate + apiGroup: infrastructure.cluster.x-k8s.io + name: {{ include "openstack-cluster.controlplane.mt.name" . }} + deletion: + nodeDrainTimeoutSeconds: {{ include "openstack-cluster.convert.nodeDrainTimeout" .Values.controlPlane }} + nodeVolumeDetachTimeoutSeconds: {{ include "openstack-cluster.convert.volumeDetachTimeout" .Values.controlPlane }} + nodeDeletionTimeoutSeconds: {{ include "openstack-cluster.convert.nodeDeletionTimeout" .Values.controlPlane }} kubeadmConfigSpec: {{ omit ( diff --git a/charts/openstack-cluster/templates/control-plane/machine-health-check.yaml b/charts/openstack-cluster/templates/control-plane/machine-health-check.yaml index aa1138ad1..2d3de43dc 100644 --- a/charts/openstack-cluster/templates/control-plane/machine-health-check.yaml +++ b/charts/openstack-cluster/templates/control-plane/machine-health-check.yaml @@ -1,6 +1,6 @@ {{- if .Values.controlPlane.healthCheck.enabled }} --- -apiVersion: cluster.x-k8s.io/v1beta1 +apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineHealthCheck metadata: name: {{ include "openstack-cluster.componentName" (list . "control-plane") }} @@ -9,5 +9,5 @@ spec: clusterName: {{ include "openstack-cluster.clusterName" . }} selector: matchLabels: {{ include "openstack-cluster.componentSelectorLabels" (list . "control-plane") | nindent 6 }} - {{- toYaml .Values.controlPlane.healthCheck.spec | nindent 2 }} + {{- include "openstack-cluster.convert.healthCheck" .Values.controlPlane.healthCheck.spec | nindent 2 }} {{- end }} diff --git a/charts/openstack-cluster/templates/node-group/kubeadm-config-template.yaml b/charts/openstack-cluster/templates/node-group/kubeadm-config-template.yaml index 7f93aa63b..79cbc42f6 100644 --- a/charts/openstack-cluster/templates/node-group/kubeadm-config-template.yaml +++ b/charts/openstack-cluster/templates/node-group/kubeadm-config-template.yaml @@ -22,6 +22,7 @@ joinConfiguration: {{ include "openstack-cluster.nodeRegistration.nodeLabels" $n {{- define "openstack-cluster.nodegroup.kct.spec" -}} {{- $ctx := index . 0 }} {{- $nodeGroup := index . 1 }} +{{- include "openstack-cluster.kubeadmConfigSpec.convert.extraArgs" $nodeGroup.kubeadmConfigSpec -}} {{- list (include "openstack-cluster.nodegroup.kct.spec.nodeLabels" (list $ctx $nodeGroup) | fromYaml) @@ -48,7 +49,7 @@ joinConfiguration: {{ include "openstack-cluster.nodeRegistration.nodeLabels" $n {{- range $nodeGroupOverrides := .Values.nodeGroups }} {{- $nodeGroup := deepCopy $.Values.nodeGroupDefaults | mustMerge $nodeGroupOverrides }} --- -apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: KubeadmConfigTemplate metadata: name: {{ include "openstack-cluster.nodegroup.kct.name" (list $ $nodeGroup) }} diff --git a/charts/openstack-cluster/templates/node-group/machine-deployment.yaml b/charts/openstack-cluster/templates/node-group/machine-deployment.yaml index eb20b1dc7..7543db7a9 100644 --- a/charts/openstack-cluster/templates/node-group/machine-deployment.yaml +++ b/charts/openstack-cluster/templates/node-group/machine-deployment.yaml @@ -4,7 +4,7 @@ {{- fail (printf "Node group name must be at least three characters long and must contain only lower-case alphanumeric characters and dashes (found name: %s)" $nodeGroup.name) }} {{- end }} --- -apiVersion: cluster.x-k8s.io/v1beta1 +apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineDeployment metadata: name: {{ include "openstack-cluster.componentName" (list $ $nodeGroup.name) }} @@ -35,7 +35,9 @@ spec: {{- if not $nodeGroup.autoscale }} replicas: {{ $nodeGroup.machineCount | required (printf "no machine count specified for node group '%s'" $nodeGroup.name) }} {{- end }} - strategy: {{ toYaml $nodeGroup.rolloutStrategy | nindent 4 }} + rollout: {{ include "openstack-cluster.convert.rolloutStrategy" $nodeGroup | nindent 4 }} + deletion: + order: {{ include "openstack-cluster.convert.deletePolicy" $nodeGroup }} selector: matchLabels: {{- include "openstack-cluster.componentSelectorLabels" (list $ "worker") | nindent 6 }} @@ -59,14 +61,15 @@ spec: {{- end }} bootstrap: configRef: - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + apiGroup: bootstrap.cluster.x-k8s.io kind: KubeadmConfigTemplate name: {{ include "openstack-cluster.nodegroup.kct.name" (list $ $nodeGroup) }} infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + apiGroup: infrastructure.cluster.x-k8s.io kind: OpenStackMachineTemplate name: {{ include "openstack-cluster.nodegroup.mt.name" (list $ $nodeGroup) }} - nodeDrainTimeout: {{ $nodeGroup.nodeDrainTimeout }} - nodeVolumeDetachTimeout: {{ $nodeGroup.nodeVolumeDetachTimeout }} - nodeDeletionTimeout: {{ $nodeGroup.nodeDeletionTimeout }} + deletion: + nodeDrainTimeoutSeconds: {{ include "openstack-cluster.convert.nodeDrainTimeout" $nodeGroup }} + nodeVolumeDetachTimeoutSeconds: {{ include "openstack-cluster.convert.volumeDetachTimeout" $nodeGroup }} + nodeDeletionTimeoutSeconds: {{ include "openstack-cluster.convert.nodeDeletionTimeout" $nodeGroup }} {{- end }} diff --git a/charts/openstack-cluster/templates/node-group/machine-health-check.yaml b/charts/openstack-cluster/templates/node-group/machine-health-check.yaml index 1e23a64c8..26b245180 100644 --- a/charts/openstack-cluster/templates/node-group/machine-health-check.yaml +++ b/charts/openstack-cluster/templates/node-group/machine-health-check.yaml @@ -2,7 +2,7 @@ {{- $nodeGroup := deepCopy $.Values.nodeGroupDefaults | mustMerge $nodeGroupOverrides }} {{- if $nodeGroup.healthCheck.enabled }} --- -apiVersion: cluster.x-k8s.io/v1beta1 +apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineHealthCheck metadata: name: {{ include "openstack-cluster.componentName" (list $ $nodeGroup.name) }} @@ -15,6 +15,6 @@ spec: matchLabels: {{- include "openstack-cluster.componentSelectorLabels" (list $ "worker") | nindent 6 }} {{ $.Values.projectPrefix }}/node-group: {{ $nodeGroup.name }} - {{- toYaml $nodeGroup.healthCheck.spec | nindent 2 }} + {{- include "openstack-cluster.convert.healthCheck" $nodeGroup.healthCheck.spec | nindent 2 }} {{- end }} {{- end }} diff --git a/charts/openstack-cluster/tests/__snapshot__/snapshot_base_test.yaml.snap b/charts/openstack-cluster/tests/__snapshot__/snapshot_base_test.yaml.snap index 008520a01..ea4b3dd6b 100644 --- a/charts/openstack-cluster/tests/__snapshot__/snapshot_base_test.yaml.snap +++ b/charts/openstack-cluster/tests/__snapshot__/snapshot_base_test.yaml.snap @@ -746,7 +746,7 @@ templated manifests should match snapshot: managedSubnets: - cidr: 192.168.3.0/24 28: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: Cluster metadata: annotations: {} @@ -766,17 +766,15 @@ templated manifests should match snapshot: cidrBlocks: - 172.24.0.0/13 controlPlaneRef: - apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + apiGroup: controlplane.cluster.x-k8s.io kind: KubeadmControlPlane name: RELEASE-NAME-control-plane - namespace: NAMESPACE infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + apiGroup: infrastructure.cluster.x-k8s.io kind: OpenStackCluster name: RELEASE-NAME - namespace: NAMESPACE 29: | - apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 kind: KubeadmControlPlane metadata: annotations: @@ -793,8 +791,10 @@ templated manifests should match snapshot: clusterConfiguration: apiServer: extraArgs: - admission-control-config-file: /etc/kubernetes/admission/configuration.yaml - v: "2" + - name: admission-control-config-file + value: /etc/kubernetes/admission/configuration.yaml + - name: v + value: "2" extraVolumes: - hostPath: /etc/kubernetes/admission mountPath: /etc/kubernetes/admission @@ -803,19 +803,26 @@ templated manifests should match snapshot: readOnly: true controllerManager: extraArgs: - bind-address: 0.0.0.0 - cloud-provider: external + - name: bind-address + value: 0.0.0.0 + - name: cloud-provider + value: external etcd: local: dataDir: /var/lib/etcd extraArgs: - election-timeout: "5000" - heartbeat-interval: "500" - listen-metrics-urls: http://0.0.0.0:2381 - quota-backend-bytes: "4294967296" + - name: election-timeout + value: "5000" + - name: heartbeat-interval + value: "500" + - name: listen-metrics-urls + value: http://0.0.0.0:2381 + - name: quota-backend-bytes + value: "4294967296" scheduler: extraArgs: - bind-address: 0.0.0.0 + - name: bind-address + value: 0.0.0.0 files: - contentFrom: secret: @@ -890,12 +897,14 @@ templated manifests should match snapshot: initConfiguration: nodeRegistration: kubeletExtraArgs: - cloud-provider: external + - name: cloud-provider + value: external name: '{{ local_hostname }}' joinConfiguration: nodeRegistration: kubeletExtraArgs: - cloud-provider: external + - name: cloud-provider + value: external name: '{{ local_hostname }}' preKubeadmCommands: - |- @@ -909,30 +918,32 @@ templated manifests should match snapshot: EOF - cat /run/kubeadm/kube-proxy-configuration.yaml >> /run/kubeadm/kubeadm.yaml machineTemplate: - infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 - kind: OpenStackMachineTemplate - name: RELEASE-NAME-control-plane-e39a716c - namespace: NAMESPACE metadata: labels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: control-plane - nodeDeletionTimeout: 5m0s - nodeDrainTimeout: 5m0s - nodeVolumeDetachTimeout: 5m0s - remediationStrategy: + spec: + deletion: + nodeDeletionTimeoutSeconds: 300 + nodeDrainTimeoutSeconds: 300 + nodeVolumeDetachTimeoutSeconds: 300 + infrastructureRef: + apiGroup: infrastructure.cluster.x-k8s.io + kind: OpenStackMachineTemplate + name: RELEASE-NAME-control-plane-e39a716c + remediation: maxRetry: 3 - minHealthyPeriod: 1h - retryPeriod: 20m + minHealthyPeriodSeconds: 3600 + retryPeriodSeconds: 1200 replicas: 3 - rolloutStrategy: - rollingUpdate: - maxSurge: 1 - type: RollingUpdate + rollout: + strategy: + rollingUpdate: + maxSurge: 1 + type: RollingUpdate version: v1.29.2 30: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineHealthCheck metadata: labels: @@ -943,20 +954,23 @@ templated manifests should match snapshot: helm.sh/chart: openstack-cluster-0.1.0 name: RELEASE-NAME-control-plane spec: + checks: + nodeStartupTimeoutSeconds: 1800 + unhealthyNodeConditions: + - status: Unknown + timeoutSeconds: 300 + type: Ready + - status: "False" + timeoutSeconds: 300 + type: Ready clusterName: RELEASE-NAME - maxUnhealthy: 1 - nodeStartupTimeout: 30m0s + remediation: + triggerIf: + unhealthyLessThanOrEqualTo: 1 selector: matchLabels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: control-plane - unhealthyConditions: - - status: Unknown - timeout: 5m0s - type: Ready - - status: "False" - timeout: 5m0s - type: Ready 31: | apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: OpenStackMachineTemplate @@ -998,11 +1012,11 @@ templated manifests should match snapshot: kind: AdmissionConfiguration plugins: [] 33: | - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: KubeadmConfigTemplate metadata: annotations: - capi.stackhpc.com/template-checksum: d43bd1bb079c9f949f46b9f6bf89e1e1027a45756426801398f5ab1199a331c4 + capi.stackhpc.com/template-checksum: c8e20309a264ffa642f909594d0726d6155cef467f05e575d96e2d4b40f5618b helm.sh/resource-policy: keep labels: capi.stackhpc.com/cluster: RELEASE-NAME @@ -1011,7 +1025,7 @@ templated manifests should match snapshot: capi.stackhpc.com/managed-by: Helm capi.stackhpc.com/node-group: group-1 helm.sh/chart: openstack-cluster-0.1.0 - name: RELEASE-NAME-group-1-d43bd1bb + name: RELEASE-NAME-group-1-c8e20309 spec: template: spec: @@ -1074,8 +1088,10 @@ templated manifests should match snapshot: joinConfiguration: nodeRegistration: kubeletExtraArgs: - cloud-provider: external - node-labels: capi.stackhpc.com/node-group=group-1 + - name: node-labels + value: capi.stackhpc.com/node-group=group-1 + - name: cloud-provider + value: external name: '{{ local_hostname }}' preKubeadmCommands: - |- @@ -1088,7 +1104,7 @@ templated manifests should match snapshot: systemctl restart containerd EOF 34: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineDeployment metadata: annotations: null @@ -1102,19 +1118,21 @@ templated manifests should match snapshot: name: RELEASE-NAME-group-1 spec: clusterName: RELEASE-NAME + deletion: + order: Random replicas: 1 + rollout: + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate selector: matchLabels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: worker capi.stackhpc.com/node-group: group-1 cluster.x-k8s.io/cluster-name: RELEASE-NAME - strategy: - rollingUpdate: - deletePolicy: Random - maxSurge: 0 - maxUnavailable: 1 - type: RollingUpdate template: metadata: labels: @@ -1124,20 +1142,21 @@ templated manifests should match snapshot: spec: bootstrap: configRef: - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + apiGroup: bootstrap.cluster.x-k8s.io kind: KubeadmConfigTemplate - name: RELEASE-NAME-group-1-d43bd1bb + name: RELEASE-NAME-group-1-c8e20309 clusterName: RELEASE-NAME + deletion: + nodeDeletionTimeoutSeconds: 300 + nodeDrainTimeoutSeconds: 300 + nodeVolumeDetachTimeoutSeconds: 300 infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + apiGroup: infrastructure.cluster.x-k8s.io kind: OpenStackMachineTemplate name: RELEASE-NAME-group-1-974c4cfd - nodeDeletionTimeout: 5m0s - nodeDrainTimeout: 5m0s - nodeVolumeDetachTimeout: 5m0s version: v1.29.2 35: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineHealthCheck metadata: labels: @@ -1149,21 +1168,24 @@ templated manifests should match snapshot: helm.sh/chart: openstack-cluster-0.1.0 name: RELEASE-NAME-group-1 spec: + checks: + nodeStartupTimeoutSeconds: 1800 + unhealthyNodeConditions: + - status: Unknown + timeoutSeconds: 300 + type: Ready + - status: "False" + timeoutSeconds: 300 + type: Ready clusterName: RELEASE-NAME - maxUnhealthy: 100% - nodeStartupTimeout: 30m0s + remediation: + triggerIf: + unhealthyLessThanOrEqualTo: 100% selector: matchLabels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: worker capi.stackhpc.com/node-group: group-1 - unhealthyConditions: - - status: Unknown - timeout: 5m0s - type: Ready - - status: "False" - timeout: 5m0s - type: Ready 36: | apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: OpenStackMachineTemplate diff --git a/charts/openstack-cluster/tests/__snapshot__/snapshot_full_test.yaml.snap b/charts/openstack-cluster/tests/__snapshot__/snapshot_full_test.yaml.snap index dce24b08c..dbbb8c824 100644 --- a/charts/openstack-cluster/tests/__snapshot__/snapshot_full_test.yaml.snap +++ b/charts/openstack-cluster/tests/__snapshot__/snapshot_full_test.yaml.snap @@ -2020,7 +2020,7 @@ templated manifests should match snapshot: managedSubnets: - cidr: 192.168.3.0/24 66: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: Cluster metadata: annotations: {} @@ -2040,17 +2040,15 @@ templated manifests should match snapshot: cidrBlocks: - 172.24.0.0/13 controlPlaneRef: - apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + apiGroup: controlplane.cluster.x-k8s.io kind: KubeadmControlPlane name: RELEASE-NAME-control-plane - namespace: NAMESPACE infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + apiGroup: infrastructure.cluster.x-k8s.io kind: OpenStackCluster name: RELEASE-NAME - namespace: NAMESPACE 67: | - apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 kind: KubeadmControlPlane metadata: annotations: @@ -2067,8 +2065,10 @@ templated manifests should match snapshot: clusterConfiguration: apiServer: extraArgs: - admission-control-config-file: /etc/kubernetes/admission/configuration.yaml - v: "2" + - name: admission-control-config-file + value: /etc/kubernetes/admission/configuration.yaml + - name: v + value: "2" extraVolumes: - hostPath: /etc/kubernetes/admission mountPath: /etc/kubernetes/admission @@ -2077,19 +2077,26 @@ templated manifests should match snapshot: readOnly: true controllerManager: extraArgs: - bind-address: 0.0.0.0 - cloud-provider: external + - name: bind-address + value: 0.0.0.0 + - name: cloud-provider + value: external etcd: local: dataDir: /var/lib/etcd extraArgs: - election-timeout: "5000" - heartbeat-interval: "500" - listen-metrics-urls: http://0.0.0.0:2381 - quota-backend-bytes: "4294967296" + - name: election-timeout + value: "5000" + - name: heartbeat-interval + value: "500" + - name: listen-metrics-urls + value: http://0.0.0.0:2381 + - name: quota-backend-bytes + value: "4294967296" scheduler: extraArgs: - bind-address: 0.0.0.0 + - name: bind-address + value: 0.0.0.0 files: - contentFrom: secret: @@ -2164,12 +2171,14 @@ templated manifests should match snapshot: initConfiguration: nodeRegistration: kubeletExtraArgs: - cloud-provider: external + - name: cloud-provider + value: external name: '{{ local_hostname }}' joinConfiguration: nodeRegistration: kubeletExtraArgs: - cloud-provider: external + - name: cloud-provider + value: external name: '{{ local_hostname }}' preKubeadmCommands: - |- @@ -2183,30 +2192,32 @@ templated manifests should match snapshot: EOF - cat /run/kubeadm/kube-proxy-configuration.yaml >> /run/kubeadm/kubeadm.yaml machineTemplate: - infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 - kind: OpenStackMachineTemplate - name: RELEASE-NAME-control-plane-e39a716c - namespace: NAMESPACE metadata: labels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: control-plane - nodeDeletionTimeout: 5m0s - nodeDrainTimeout: 5m0s - nodeVolumeDetachTimeout: 5m0s - remediationStrategy: + spec: + deletion: + nodeDeletionTimeoutSeconds: 300 + nodeDrainTimeoutSeconds: 300 + nodeVolumeDetachTimeoutSeconds: 300 + infrastructureRef: + apiGroup: infrastructure.cluster.x-k8s.io + kind: OpenStackMachineTemplate + name: RELEASE-NAME-control-plane-e39a716c + remediation: maxRetry: 3 - minHealthyPeriod: 1h - retryPeriod: 20m + minHealthyPeriodSeconds: 3600 + retryPeriodSeconds: 1200 replicas: 3 - rolloutStrategy: - rollingUpdate: - maxSurge: 1 - type: RollingUpdate + rollout: + strategy: + rollingUpdate: + maxSurge: 1 + type: RollingUpdate version: v1.29.2 68: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineHealthCheck metadata: labels: @@ -2217,20 +2228,23 @@ templated manifests should match snapshot: helm.sh/chart: openstack-cluster-0.1.0 name: RELEASE-NAME-control-plane spec: + checks: + nodeStartupTimeoutSeconds: 1800 + unhealthyNodeConditions: + - status: Unknown + timeoutSeconds: 300 + type: Ready + - status: "False" + timeoutSeconds: 300 + type: Ready clusterName: RELEASE-NAME - maxUnhealthy: 1 - nodeStartupTimeout: 30m0s + remediation: + triggerIf: + unhealthyLessThanOrEqualTo: 1 selector: matchLabels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: control-plane - unhealthyConditions: - - status: Unknown - timeout: 5m0s - type: Ready - - status: "False" - timeout: 5m0s - type: Ready 69: | apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: OpenStackMachineTemplate @@ -2272,11 +2286,11 @@ templated manifests should match snapshot: kind: AdmissionConfiguration plugins: [] 71: | - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: KubeadmConfigTemplate metadata: annotations: - capi.stackhpc.com/template-checksum: d43bd1bb079c9f949f46b9f6bf89e1e1027a45756426801398f5ab1199a331c4 + capi.stackhpc.com/template-checksum: c8e20309a264ffa642f909594d0726d6155cef467f05e575d96e2d4b40f5618b helm.sh/resource-policy: keep labels: capi.stackhpc.com/cluster: RELEASE-NAME @@ -2285,7 +2299,7 @@ templated manifests should match snapshot: capi.stackhpc.com/managed-by: Helm capi.stackhpc.com/node-group: group-1 helm.sh/chart: openstack-cluster-0.1.0 - name: RELEASE-NAME-group-1-d43bd1bb + name: RELEASE-NAME-group-1-c8e20309 spec: template: spec: @@ -2348,8 +2362,10 @@ templated manifests should match snapshot: joinConfiguration: nodeRegistration: kubeletExtraArgs: - cloud-provider: external - node-labels: capi.stackhpc.com/node-group=group-1 + - name: node-labels + value: capi.stackhpc.com/node-group=group-1 + - name: cloud-provider + value: external name: '{{ local_hostname }}' preKubeadmCommands: - |- @@ -2362,11 +2378,11 @@ templated manifests should match snapshot: systemctl restart containerd EOF 72: | - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: KubeadmConfigTemplate metadata: annotations: - capi.stackhpc.com/template-checksum: e1d1a382f8640c6551c40f3da6902e8d45a774e09fe30be4898ba00a70a63a67 + capi.stackhpc.com/template-checksum: fa4874fbaed7594d52e7cdb46ecef14911802f995057be2c68d31dbbbf3e8ced helm.sh/resource-policy: keep labels: capi.stackhpc.com/cluster: RELEASE-NAME @@ -2375,7 +2391,7 @@ templated manifests should match snapshot: capi.stackhpc.com/managed-by: Helm capi.stackhpc.com/node-group: group-2 helm.sh/chart: openstack-cluster-0.1.0 - name: RELEASE-NAME-group-2-e1d1a382 + name: RELEASE-NAME-group-2-fa4874fb spec: template: spec: @@ -2438,8 +2454,10 @@ templated manifests should match snapshot: joinConfiguration: nodeRegistration: kubeletExtraArgs: - cloud-provider: external - node-labels: capi.stackhpc.com/node-group=group-2 + - name: node-labels + value: capi.stackhpc.com/node-group=group-2 + - name: cloud-provider + value: external name: '{{ local_hostname }}' preKubeadmCommands: - |- @@ -2452,7 +2470,7 @@ templated manifests should match snapshot: systemctl restart containerd EOF 73: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineDeployment metadata: annotations: null @@ -2466,19 +2484,21 @@ templated manifests should match snapshot: name: RELEASE-NAME-group-1 spec: clusterName: RELEASE-NAME + deletion: + order: Random replicas: 1 + rollout: + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate selector: matchLabels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: worker capi.stackhpc.com/node-group: group-1 cluster.x-k8s.io/cluster-name: RELEASE-NAME - strategy: - rollingUpdate: - deletePolicy: Random - maxSurge: 0 - maxUnavailable: 1 - type: RollingUpdate template: metadata: labels: @@ -2488,20 +2508,21 @@ templated manifests should match snapshot: spec: bootstrap: configRef: - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + apiGroup: bootstrap.cluster.x-k8s.io kind: KubeadmConfigTemplate - name: RELEASE-NAME-group-1-d43bd1bb + name: RELEASE-NAME-group-1-c8e20309 clusterName: RELEASE-NAME + deletion: + nodeDeletionTimeoutSeconds: 300 + nodeDrainTimeoutSeconds: 300 + nodeVolumeDetachTimeoutSeconds: 300 infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + apiGroup: infrastructure.cluster.x-k8s.io kind: OpenStackMachineTemplate name: RELEASE-NAME-group-1-974c4cfd - nodeDeletionTimeout: 5m0s - nodeDrainTimeout: 5m0s - nodeVolumeDetachTimeout: 5m0s version: v1.29.2 74: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineDeployment metadata: annotations: @@ -2517,18 +2538,20 @@ templated manifests should match snapshot: name: RELEASE-NAME-group-2 spec: clusterName: RELEASE-NAME + deletion: + order: Random + rollout: + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate selector: matchLabels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: worker capi.stackhpc.com/node-group: group-2 cluster.x-k8s.io/cluster-name: RELEASE-NAME - strategy: - rollingUpdate: - deletePolicy: Random - maxSurge: 0 - maxUnavailable: 1 - type: RollingUpdate template: metadata: labels: @@ -2538,20 +2561,21 @@ templated manifests should match snapshot: spec: bootstrap: configRef: - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + apiGroup: bootstrap.cluster.x-k8s.io kind: KubeadmConfigTemplate - name: RELEASE-NAME-group-2-e1d1a382 + name: RELEASE-NAME-group-2-fa4874fb clusterName: RELEASE-NAME + deletion: + nodeDeletionTimeoutSeconds: 300 + nodeDrainTimeoutSeconds: 300 + nodeVolumeDetachTimeoutSeconds: 300 infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + apiGroup: infrastructure.cluster.x-k8s.io kind: OpenStackMachineTemplate name: RELEASE-NAME-group-2-26685203 - nodeDeletionTimeout: 5m0s - nodeDrainTimeout: 5m0s - nodeVolumeDetachTimeout: 5m0s version: v1.29.2 75: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineHealthCheck metadata: labels: @@ -2563,23 +2587,26 @@ templated manifests should match snapshot: helm.sh/chart: openstack-cluster-0.1.0 name: RELEASE-NAME-group-1 spec: + checks: + nodeStartupTimeoutSeconds: 1800 + unhealthyNodeConditions: + - status: Unknown + timeoutSeconds: 300 + type: Ready + - status: "False" + timeoutSeconds: 300 + type: Ready clusterName: RELEASE-NAME - maxUnhealthy: 100% - nodeStartupTimeout: 30m0s + remediation: + triggerIf: + unhealthyLessThanOrEqualTo: 100% selector: matchLabels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: worker capi.stackhpc.com/node-group: group-1 - unhealthyConditions: - - status: Unknown - timeout: 5m0s - type: Ready - - status: "False" - timeout: 5m0s - type: Ready 76: | - apiVersion: cluster.x-k8s.io/v1beta1 + apiVersion: cluster.x-k8s.io/v1beta2 kind: MachineHealthCheck metadata: labels: @@ -2591,21 +2618,24 @@ templated manifests should match snapshot: helm.sh/chart: openstack-cluster-0.1.0 name: RELEASE-NAME-group-2 spec: + checks: + nodeStartupTimeoutSeconds: 1800 + unhealthyNodeConditions: + - status: Unknown + timeoutSeconds: 300 + type: Ready + - status: "False" + timeoutSeconds: 300 + type: Ready clusterName: RELEASE-NAME - maxUnhealthy: 100% - nodeStartupTimeout: 30m0s + remediation: + triggerIf: + unhealthyLessThanOrEqualTo: 100% selector: matchLabels: capi.stackhpc.com/cluster: RELEASE-NAME capi.stackhpc.com/component: worker capi.stackhpc.com/node-group: group-2 - unhealthyConditions: - - status: Unknown - timeout: 5m0s - type: Ready - - status: "False" - timeout: 5m0s - type: Ready 77: | apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: OpenStackMachineTemplate diff --git a/charts/openstack-cluster/values.yaml b/charts/openstack-cluster/values.yaml index 1875f6c37..3b00f23af 100644 --- a/charts/openstack-cluster/values.yaml +++ b/charts/openstack-cluster/values.yaml @@ -104,6 +104,9 @@ clusterNetworking: # List of nameserver IPs to use when creating a cluster network. # This is only used if neither of networkFilter and subnetFilter are given dnsNameservers: + # The MTU to use if creating a cluster network + # This is only used if neither of networkFilter and subnetFilter are given + networkMTU: # Settings for registry mirrors # When a mirror is set, it will be tried for images but will fall back to the @@ -148,12 +151,16 @@ etcd: # Set timeouts so that etcd tolerates 'slowness' (network + disks) better # This is at the expense of taking longer to detect a leader failure # https://etcd.io/docs/v3.5/tuning/#time-parameters - heartbeat-interval: "500" # defaults to 100ms in etcd 3.5 - election-timeout: "5000" # defaults to 1000ms in etcd 3.5 - # Set a slightly larger space quota than the default (default is 2GB) - quota-backend-bytes: "4294967296" + - name: "election-timeout" + value: "5000" # defaults to 1000ms in etcd 3.5 + - name: "heartbeat-interval" + value: "500" # defaults to 100ms in etcd 3.5 # Listen for metrics on 0.0.0.0 so Prometheus can collect them - listen-metrics-urls: http://0.0.0.0:2381 + - name: "listen-metrics-urls" + value: "http://0.0.0.0:2381" + # Set a slightly larger space quota than the default (default is 2GB) + - name: "quota-backend-bytes" + value: "4294967296" # At-rest encryption settings encryption: enabled: false @@ -320,31 +327,32 @@ controlPlane: # Indicates whether control plane machines should use config drive or not machineConfigDrive: false # The time to wait for a node to finish draining before it can be removed - nodeDrainTimeout: 5m0s + nodeDrainTimeoutSeconds: 300 # The time to wait for a node to detach all volumes before it can be removed - nodeVolumeDetachTimeout: 5m0s + nodeVolumeDetachTimeoutSeconds: 300 # The time to wait for the node resource to be deleted in Kubernetes when a # machine is marked for deletion - nodeDeletionTimeout: 5m0s + nodeDeletionTimeoutSeconds: 300 # The remediation strategy for the control plane nodes # We set these so that we don't keep remediating an unhealthy control plane forever - remediationStrategy: + remediation: # The maximum number of times that a remediation will be retried maxRetry: 3 # The amount of time that a node created as a remediation has to become healthy # before the remediation is retried - retryPeriod: 20m + retryPeriodSeconds: 1200 # The length of time that a node must be healthy before any future problems are # considered unrelated to the previous ones (i.e. the retry count is reset) - minHealthyPeriod: 1h + minHealthyPeriodSeconds: 3600 # The rollout strategy to use for the control plane nodes # By default, the strategy allows the control plane to begin provisioning new nodes # without first tearing down old ones - rolloutStrategy: - type: RollingUpdate - rollingUpdate: - # For the control plane, this can only be 0 or 1 - maxSurge: 1 + rollout: + strategy: + type: RollingUpdate + rollingUpdate: + # For the control plane, this can only be 0 or 1 + maxSurge: 1 # The kubeadm config specification for the control plane # By default, this uses a simple configuration that enables the external cloud provider kubeadmConfigSpec: @@ -352,23 +360,28 @@ controlPlane: nodeRegistration: name: '{{ local_hostname }}' kubeletExtraArgs: - cloud-provider: external + - name: "cloud-provider" + value: "external" # As well as enabling an external cloud provider, we set the bind addresses for the # controller-manager, scheduler and kube-proxy to 0.0.0.0 so that Prometheus can reach # them to collect metrics clusterConfiguration: controllerManager: extraArgs: - cloud-provider: external - bind-address: 0.0.0.0 + - name: "bind-address" + value: 0.0.0.0 + - name: "cloud-provider" + value: "external" scheduler: extraArgs: - bind-address: 0.0.0.0 + - name: "bind-address" + value: 0.0.0.0 joinConfiguration: nodeRegistration: name: '{{ local_hostname }}' kubeletExtraArgs: - cloud-provider: external + - name: "cloud-provider" + value: "external" kubeProxyConfiguration: metricsBindAddress: 0.0.0.0:10249 # The machine health check for auto-healing of the control plane @@ -378,19 +391,22 @@ controlPlane: enabled: true # The spec for the health check spec: - # By default, don't remediate control plane nodes when more than one is unhealthy - maxUnhealthy: 1 - # If a node takes longer than 30 mins to startup, remediate it - nodeStartupTimeout: 30m0s - # By default, consider a control plane node that has not been Ready - # for more than 5 mins unhealthy - unhealthyConditions: - - type: Ready - status: Unknown - timeout: 5m0s - - type: Ready - status: "False" - timeout: 5m0s + checks: + # If a node takes longer than 30 mins to startup, remediate it + nodeStartupTimeoutSeconds: 1800 + # By default, consider a control plane node that has not been Ready + # for more than 5 mins unhealthy + unhealthyNodeConditions: + - type: Ready + status: Unknown + timeoutSeconds: 300 + - type: Ready + status: "False" + timeoutSeconds: 300 + remediation: + triggerIf: + # By default, don't remediate control plane nodes when more than one is unhealthy + unhealthyLessThanOrEqualTo: 1 # Defaults for node groups # Each of these can be overridden in the specification for an individual node group @@ -440,27 +456,29 @@ nodeGroupDefaults: # Indicates whether control plane machines should use config drive or not machineConfigDrive: false # The time to wait for a node to finish draining before it can be removed - nodeDrainTimeout: 5m0s + nodeDrainTimeoutSeconds: 300 # The time to wait for a node to detach all volumes before it can be removed - nodeVolumeDetachTimeout: 5m0s + nodeVolumeDetachTimeoutSeconds: 300 # The time to wait for the node resource to be deleted in Kubernetes when a # machine is marked for deletion - nodeDeletionTimeout: 5m0s + nodeDeletionTimeoutSeconds: 300 # The rollout strategy to use for the node group # By default, this is set to do a rolling update within the existing resource envelope # of the node group, even if that means the node group temporarily has zero nodes - rolloutStrategy: - type: RollingUpdate - rollingUpdate: - # The maximum number of node group machines that can be unavailable during the update - # Can be an absolute number or a percentage of the desired count - maxUnavailable: 1 - # The maximum number of machines that can be scheduled above the desired count for - # the group during an update - # Can be an absolute number or a percentage of the desired count - maxSurge: 0 - # One of Random, Newest, Oldest - deletePolicy: Random + rollout: + strategy: + type: RollingUpdate + rollingUpdate: + # The maximum number of node group machines that can be unavailable during the update + # Can be an absolute number or a percentage of the desired count + maxUnavailable: 1 + # The maximum number of machines that can be scheduled above the desired count for + # the group during an update + # Can be an absolute number or a percentage of the desired count + maxSurge: 0 + deletion: + # One of Random, Newest, Oldest + order: Random # The default kubeadm config specification for worker nodes # This will be merged with any configuration given for specific node groups # By default, this uses a simple configuration that enables the external cloud provider @@ -469,7 +487,8 @@ nodeGroupDefaults: nodeRegistration: name: '{{ local_hostname }}' kubeletExtraArgs: - cloud-provider: external + - name: "cloud-provider" + value: "external" # The default machine health check for worker nodes # See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html # Note that maxUnhealthy or unhealthRange are evaluated per node group @@ -478,21 +497,24 @@ nodeGroupDefaults: enabled: true # The spec for the health check spec: - # We have a control place feature gate enabled which blocks all worker node remediation if - # control plane is unhealthy, so should be safe to reset worker remediation threshold to - # 100% without the risk of runaway remediations. - maxUnhealthy: 100% - # If a node takes longer than 30 mins to startup, remediate it - nodeStartupTimeout: 30m0s - # By default, consider a worker node that has not been Ready for - # more than 5 mins unhealthy - unhealthyConditions: - - type: Ready - status: Unknown - timeout: 5m0s - - type: Ready - status: "False" - timeout: 5m0s + checks: + # If a node takes longer than 30 mins to startup, remediate it + nodeStartupTimeoutSeconds: 1800 + # By default, consider a control plane node that has not been Ready + # for more than 5 mins unhealthy + unhealthyNodeConditions: + - type: Ready + status: Unknown + timeoutSeconds: 300 + - type: Ready + status: "False" + timeoutSeconds: 300 + remediation: + triggerIf: + # We have a control place feature gate enabled which blocks all worker node remediation if + # control plane is unhealthy, so should be safe to reset worker remediation threshold to + # 100% without the risk of runaway remediations. + unhealthyLessThanOrEqualTo: 100% # The worker node groups for the cluster nodeGroups: diff --git a/dependencies.json b/dependencies.json index eca86f816..1b0564861 100644 --- a/dependencies.json +++ b/dependencies.json @@ -1,10 +1,11 @@ { "addon-provider": "0.11.0", "azimuth-images": "0.26.0", - "cluster-api": "v1.10.4", + "cluster-api": "v1.12.2", "cluster-api-janitor-openstack": "0.11.0", - "cluster-api-provider-openstack": "v0.11.3", + "cluster-api-provider-openstack": "v0.14.1", "cert-manager": "v1.20.1", "helm": "v3.17.3", + "openstack-resource-controller": "v2.4.0", "sonobuoy": "v0.57.3" -} \ No newline at end of file +}