From 46c211a6edf76ff85a68cdb05ffe4a3f07097a7c Mon Sep 17 00:00:00 2001 From: Hugo Shaka Date: Wed, 11 Jan 2023 14:49:08 -0500 Subject: [PATCH 1/5] helm: add wait initContainer and preStop hooks --- .../teleport-cluster/templates/_helpers.tpl | 13 ++++++ .../templates/auth/deployment.yaml | 12 +++++- .../auth/service-previous-version.yaml | 21 +++++++++- .../templates/proxy/deployment.yaml | 23 ++++++++++- .../auth_deployment_test.yaml.snap | 36 ++++++++++++++++ .../proxy_deployment_test.yaml.snap | 41 +++++++++++++++++++ .../tests/proxy_deployment_test.yaml | 36 ++++++++-------- .../chart/teleport-cluster/values.schema.json | 5 +++ examples/chart/teleport-cluster/values.yaml | 6 +++ 9 files changed, 170 insertions(+), 23 deletions(-) diff --git a/examples/chart/teleport-cluster/templates/_helpers.tpl b/examples/chart/teleport-cluster/templates/_helpers.tpl index bdfb64eb6185e..e5c22195a87e7 100644 --- a/examples/chart/teleport-cluster/templates/_helpers.tpl +++ b/examples/chart/teleport-cluster/templates/_helpers.tpl @@ -18,6 +18,10 @@ if serviceAccount is not defined or serviceAccount.name is empty, use .Release.N {{- (semver (include "teleport-cluster.version" .)).Major -}} {{- end -}} +{{- define "teleport-cluster.previousMajorVersion" -}} +{{- sub (include "teleport-cluster.majorVersion" . | atoi ) 1 -}} +{{- end -}} + {{/* Proxy selector labels */}} {{- define "teleport-cluster.proxy.selectorLabels" -}} app.kubernetes.io/name: '{{ default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}' @@ -71,6 +75,15 @@ teleport.dev/majorVersion: '{{ include "teleport-cluster.majorVersion" . }}' {{- .Release.Name | trunc 58 | trimSuffix "-" -}}-auth {{- end -}} +{{- define "teleport-cluster.auth.currentVersionServiceName" -}} +{{- .Release.Name | trunc 54 | trimSuffix "-" -}}-auth-v{{ include "teleport-cluster.majorVersion" . }} +{{- end -}} + +{{- define "teleport-cluster.auth.previousVersionServiceName" -}} +{{- .Release.Name | trunc 54 | trimSuffix "-" -}}-auth-v{{ include "teleport-cluster.previousMajorVersion" . }} +{{- end -}} + + {{/* In most places we want to use the FQDN instead of relying on Kubernetes ndots behaviour for performance reasons */}} {{- define "teleport-cluster.auth.serviceFQDN" -}} diff --git a/examples/chart/teleport-cluster/templates/auth/deployment.yaml b/examples/chart/teleport-cluster/templates/auth/deployment.yaml index 803df194c4185..3dbf8b30eb411 100644 --- a/examples/chart/teleport-cluster/templates/auth/deployment.yaml +++ b/examples/chart/teleport-cluster/templates/auth/deployment.yaml @@ -158,8 +158,17 @@ spec: periodSeconds: 5 # poll health every 5s failureThreshold: 12 # consider agent unhealthy after 60s (12 * 5s) timeoutSeconds: {{ .Values.probeTimeoutSeconds }} -{{- if .Values.postStart.command }} lifecycle: + # waiting during preStop ensures no new request will hit the Terminating pod + # on clusters using kube-proxy (kube-proxy syncs the node iptables rules every 30s) + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s +{{- if .Values.postStart.command }} postStart: exec: command: {{ toYaml .Values.postStart.command | nindent 14 }} @@ -264,3 +273,4 @@ spec: priorityClassName: {{ .Values.priorityClassName }} {{- end }} serviceAccountName: {{ include "teleport-cluster.auth.serviceAccountName" . }} + terminationGracePeriodSeconds: {{ $auth.terminationGracePeriodSeconds }} diff --git a/examples/chart/teleport-cluster/templates/auth/service-previous-version.yaml b/examples/chart/teleport-cluster/templates/auth/service-previous-version.yaml index 1547e00951688..d10392489a43c 100644 --- a/examples/chart/teleport-cluster/templates/auth/service-previous-version.yaml +++ b/examples/chart/teleport-cluster/templates/auth/service-previous-version.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Service metadata: - name: {{ .Release.Name }}-auth-old + name: {{ include "teleport-cluster.auth.previousVersionServiceName" . }} namespace: {{ .Release.Namespace }} labels: {{- include "teleport-cluster.auth.labels" . | nindent 4 }} spec: @@ -13,4 +13,21 @@ spec: publishNotReadyAddresses: true selector: {{- include "teleport-cluster.auth.selectorLabels" . | nindent 4 }} - teleport.dev/majorVersion: {{ sub (include "teleport-cluster.majorVersion" . | atoi ) 1 | quote }} + teleport.dev/majorVersion: {{ include "teleport-cluster.previousMajorVersion" . | quote }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "teleport-cluster.auth.currentVersionServiceName" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "teleport-cluster.auth.labels" . | nindent 4 }} +spec: + # This is a headless service. Resolving it will return the list of all auth pods running the previous major version + # Proxies should not connect to auth pods from the previous major version + # Proxy rollout should be held until this headLessService does not match pods anymore. + clusterIP: "None" + # Publishing not ready addresses ensures that unhealthy or terminating pods are still accounted for + publishNotReadyAddresses: true + selector: + {{- include "teleport-cluster.auth.selectorLabels" . | nindent 4 }} + teleport.dev/majorVersion: {{ include "teleport-cluster.majorVersion" . | quote }} diff --git a/examples/chart/teleport-cluster/templates/proxy/deployment.yaml b/examples/chart/teleport-cluster/templates/proxy/deployment.yaml index fa31085bf7a0b..eca8b1ed9aa27 100644 --- a/examples/chart/teleport-cluster/templates/proxy/deployment.yaml +++ b/examples/chart/teleport-cluster/templates/proxy/deployment.yaml @@ -91,8 +91,17 @@ spec: {{- if $proxy.tolerations }} tolerations: {{- toYaml $proxy.tolerations | nindent 6 }} {{- end }} -{{- if $proxy.initContainers }} initContainers: + # wait-auth-update is responsible for holding off the proxy rollout until all auths are running the + # next major version in case of major upgrade. + - name: wait-auth-update + image: '{{ if $proxy.enterprise }}{{ $proxy.enterpriseImage }}{{ else }}{{ $proxy.image }}{{ end }}:{{ include "teleport-cluster.version" . }}' + command: + - teleport + - wait + - no-resolve + - '{{ include "teleport-cluster.auth.previousVersionServiceName" . }}.{{ .Release.Namespace }}.svc.cluster.local' +{{- if $proxy.initContainers }} {{- range $initContainer := $proxy.initContainers }} {{- if and (not $initContainer.resources) $proxy.resources }} {{- $_ := set $initContainer "resources" $proxy.resources }} @@ -161,8 +170,17 @@ spec: periodSeconds: 5 # poll health every 5s failureThreshold: 12 # consider agent unhealthy after 60s (12 * 5s) timeoutSeconds: {{ $proxy.probeTimeoutSeconds }} -{{- if $proxy.postStart.command }} lifecycle: + # waiting during preStop ensures no new request will hit the Terminating pod + # on clusters using kube-proxy (kube-proxy syncs the node iptables rules every 30s) + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s +{{- if $proxy.postStart.command }} postStart: exec: command: {{ toYaml $proxy.postStart.command | nindent 14 }} @@ -224,3 +242,4 @@ spec: priorityClassName: {{ $proxy.priorityClassName }} {{- end }} serviceAccountName: {{ include "teleport-cluster.proxy.serviceAccountName" . }} + terminationGracePeriodSeconds: {{ $proxy.terminationGracePeriodSeconds }} diff --git a/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap b/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap index 1b8742dcde1db..74c035fed4c7c 100644 --- a/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap +++ b/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap @@ -32,6 +32,14 @@ should add an operator side-car when operator is enabled: - --apply-on-startup=/etc/teleport/apply-on-startup.yaml image: public.ecr.aws/gravitational/teleport:12.0.0-dev imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s livenessProbe: failureThreshold: 6 httpGet: @@ -60,6 +68,7 @@ should add an operator side-car when operator is enabled: - mountPath: /var/lib/teleport name: data serviceAccountName: RELEASE-NAME + terminationGracePeriodSeconds: 60 volumes: - configMap: name: RELEASE-NAME-auth @@ -137,6 +146,14 @@ should set resources when set in values: - --apply-on-startup=/etc/teleport/apply-on-startup.yaml image: public.ecr.aws/gravitational/teleport:12.0.0-dev imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s livenessProbe: failureThreshold: 6 httpGet: @@ -172,6 +189,7 @@ should set resources when set in values: - mountPath: /var/lib/teleport name: data serviceAccountName: RELEASE-NAME + terminationGracePeriodSeconds: 60 volumes: - configMap: name: RELEASE-NAME-auth @@ -189,6 +207,14 @@ should set securityContext when set in values: - --apply-on-startup=/etc/teleport/apply-on-startup.yaml image: public.ecr.aws/gravitational/teleport:12.0.0-dev imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s livenessProbe: failureThreshold: 6 httpGet: @@ -224,6 +250,7 @@ should set securityContext when set in values: - mountPath: /var/lib/teleport name: data serviceAccountName: RELEASE-NAME + terminationGracePeriodSeconds: 60 volumes: - configMap: name: RELEASE-NAME-auth @@ -251,6 +278,14 @@ should use OSS image and not mount license when enterprise is not set in values: - --apply-on-startup=/etc/teleport/apply-on-startup.yaml image: public.ecr.aws/gravitational/teleport:8.3.4 imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s livenessProbe: failureThreshold: 6 httpGet: @@ -279,6 +314,7 @@ should use OSS image and not mount license when enterprise is not set in values: - mountPath: /var/lib/teleport name: data serviceAccountName: RELEASE-NAME + terminationGracePeriodSeconds: 60 volumes: - configMap: name: RELEASE-NAME-auth diff --git a/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap b/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap index 437e02ead7c9a..77af94b6496c9 100644 --- a/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap +++ b/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap @@ -1,5 +1,12 @@ should provision initContainer correctly when set in values: 1: | + - command: + - teleport + - wait + - no-resolve + - RELEASE-NAME-auth-v11.NAMESPACE.svc.cluster.local + image: public.ecr.aws/gravitational/teleport:12.0.0-dev + name: wait-auth-update - args: - echo test image: alpine @@ -68,6 +75,14 @@ should set resources when set in values: - --diag-addr=0.0.0.0:3000 image: public.ecr.aws/gravitational/teleport:12.0.0-dev imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s livenessProbe: failureThreshold: 6 httpGet: @@ -102,7 +117,16 @@ should set resources when set in values: readOnly: true - mountPath: /var/lib/teleport name: data + initContainers: + - command: + - teleport + - wait + - no-resolve + - RELEASE-NAME-auth-v11.NAMESPACE.svc.cluster.local + image: public.ecr.aws/gravitational/teleport:12.0.0-dev + name: wait-auth-update serviceAccountName: RELEASE-NAME-proxy + terminationGracePeriodSeconds: 60 volumes: - configMap: name: RELEASE-NAME-proxy @@ -118,6 +142,14 @@ should set securityContext when set in values: - --diag-addr=0.0.0.0:3000 image: public.ecr.aws/gravitational/teleport:12.0.0-dev imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s livenessProbe: failureThreshold: 6 httpGet: @@ -152,7 +184,16 @@ should set securityContext when set in values: readOnly: true - mountPath: /var/lib/teleport name: data + initContainers: + - command: + - teleport + - wait + - no-resolve + - RELEASE-NAME-auth-v11.NAMESPACE.svc.cluster.local + image: public.ecr.aws/gravitational/teleport:12.0.0-dev + name: wait-auth-update serviceAccountName: RELEASE-NAME-proxy + terminationGracePeriodSeconds: 60 volumes: - configMap: name: RELEASE-NAME-proxy diff --git a/examples/chart/teleport-cluster/tests/proxy_deployment_test.yaml b/examples/chart/teleport-cluster/tests/proxy_deployment_test.yaml index 062f69f5082b3..5ad270a411a3b 100644 --- a/examples/chart/teleport-cluster/tests/proxy_deployment_test.yaml +++ b/examples/chart/teleport-cluster/tests/proxy_deployment_test.yaml @@ -229,13 +229,13 @@ tests: secret: secretName: teleport-tls - contains: - path: spec.template.spec.initContainers[0].volumeMounts + path: spec.template.spec.initContainers[1].volumeMounts content: mountPath: /etc/teleport-tls name: "teleport-tls" readOnly: true - contains: - path: spec.template.spec.initContainers[1].volumeMounts + path: spec.template.spec.initContainers[2].volumeMounts content: mountPath: /etc/teleport-tls name: "teleport-tls" @@ -271,12 +271,12 @@ tests: mountPath: /path/to/mount name: my-mount - contains: - path: spec.template.spec.initContainers[0].volumeMounts + path: spec.template.spec.initContainers[1].volumeMounts content: mountPath: /path/to/mount name: my-mount - contains: - path: spec.template.spec.initContainers[1].volumeMounts + path: spec.template.spec.initContainers[2].volumeMounts content: mountPath: /path/to/mount name: my-mount @@ -316,46 +316,46 @@ tests: - ../.lint/extra-env.yaml asserts: - contains: - path: spec.template.spec.initContainers[0].args + path: spec.template.spec.initContainers[1].args content: "echo test" - equal: - path: spec.template.spec.initContainers[0].name + path: spec.template.spec.initContainers[1].name value: "teleport-init" - equal: - path: spec.template.spec.initContainers[0].image + path: spec.template.spec.initContainers[1].image value: "alpine" - equal: - path: spec.template.spec.initContainers[0].resources.limits.cpu + path: spec.template.spec.initContainers[1].resources.limits.cpu value: 2 - equal: - path: spec.template.spec.initContainers[0].resources.limits.memory + path: spec.template.spec.initContainers[1].resources.limits.memory value: 4Gi - equal: - path: spec.template.spec.initContainers[0].resources.requests.cpu + path: spec.template.spec.initContainers[1].resources.requests.cpu value: 1 - equal: - path: spec.template.spec.initContainers[0].resources.requests.memory + path: spec.template.spec.initContainers[1].resources.requests.memory value: 2Gi - contains: - path: spec.template.spec.initContainers[1].args + path: spec.template.spec.initContainers[2].args content: "echo test2" - equal: - path: spec.template.spec.initContainers[1].name + path: spec.template.spec.initContainers[2].name value: "teleport-init2" - equal: - path: spec.template.spec.initContainers[1].image + path: spec.template.spec.initContainers[2].image value: "alpine" - equal: - path: spec.template.spec.initContainers[1].resources.limits.cpu + path: spec.template.spec.initContainers[2].resources.limits.cpu value: 2 - equal: - path: spec.template.spec.initContainers[1].resources.limits.memory + path: spec.template.spec.initContainers[2].resources.limits.memory value: 4Gi - equal: - path: spec.template.spec.initContainers[1].resources.requests.cpu + path: spec.template.spec.initContainers[2].resources.requests.cpu value: 1 - equal: - path: spec.template.spec.initContainers[1].resources.requests.memory + path: spec.template.spec.initContainers[2].resources.requests.memory value: 2Gi - matchSnapshot: path: spec.template.spec.initContainers diff --git a/examples/chart/teleport-cluster/values.schema.json b/examples/chart/teleport-cluster/values.schema.json index f12db18c53064..c42b6bd1434fd 100644 --- a/examples/chart/teleport-cluster/values.schema.json +++ b/examples/chart/teleport-cluster/values.schema.json @@ -832,6 +832,11 @@ "$id": "#/properties/probeTimeoutSeconds", "type": "integer", "default": 1 + }, + "terminationGracePeriodSeconds": { + "$id": "#/properties/terminationGracePeriodSeconds", + "type": "integer", + "default": 60 } } } diff --git a/examples/chart/teleport-cluster/values.yaml b/examples/chart/teleport-cluster/values.yaml index 5420a57aa877d..2a54ed4da8a31 100644 --- a/examples/chart/teleport-cluster/values.yaml +++ b/examples/chart/teleport-cluster/values.yaml @@ -468,3 +468,9 @@ tolerations: [] # Timeouts for the readiness and liveness probes # https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ probeTimeoutSeconds: 1 + +# Kubernetes termination grave period +# https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#hook-handler-execution +# +# This should be greater than 30 seconds as pods are waiting 30 seconds in a preStop hook. +terminationGracePeriodSeconds: 60 From ea2a536cde363d76660a4faa216139e14987094d Mon Sep 17 00:00:00 2001 From: Hugo Hervieux Date: Wed, 11 Jan 2023 18:10:41 -0500 Subject: [PATCH 2/5] bug: fix mishandling of NXDOMAIN answers in `wait no-resolve` --- tool/teleport/common/wait.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tool/teleport/common/wait.go b/tool/teleport/common/wait.go index cd72f4c1715d4..ebdbb033515cb 100644 --- a/tool/teleport/common/wait.go +++ b/tool/teleport/common/wait.go @@ -135,7 +135,7 @@ func waitNoResolve(ctx context.Context, domain string, period, timeout time.Dura func checkDomainNoResolve(domainName string) (exit bool, err error) { endpoints, err := countEndpoints(domainName) if err != nil { - dnsErr, ok := err.(*net.DNSError) + dnsErr, ok := trace.Unwrap(err).(*net.DNSError) if !ok { log.Errorf("unexpected error when resolving domain %s : %s", domainName, err) return false, trace.Wrap(err) From e29868de602225778aa22b30c2f64ca49854663a Mon Sep 17 00:00:00 2001 From: Hugo Hervieux Date: Wed, 11 Jan 2023 18:34:44 -0500 Subject: [PATCH 3/5] helm: fix auth deployment value overrides --- .../templates/auth/deployment.yaml | 58 +++++++++---------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/examples/chart/teleport-cluster/templates/auth/deployment.yaml b/examples/chart/teleport-cluster/templates/auth/deployment.yaml index 3dbf8b30eb411..7f9f4640aa7b4 100644 --- a/examples/chart/teleport-cluster/templates/auth/deployment.yaml +++ b/examples/chart/teleport-cluster/templates/auth/deployment.yaml @@ -168,30 +168,30 @@ spec: - wait - duration - 30s -{{- if .Values.postStart.command }} +{{- if $auth.postStart.command }} postStart: exec: - command: {{ toYaml .Values.postStart.command | nindent 14 }} + command: {{ toYaml $auth.postStart.command | nindent 14 }} {{- end }} -{{- if .Values.resources }} +{{- if $auth.resources }} resources: - {{- toYaml .Values.resources | nindent 10 }} + {{- toYaml $auth.resources | nindent 10 }} {{- end }} -{{- if .Values.securityContext }} - securityContext: {{- toYaml .Values.securityContext | nindent 10 }} +{{- if $auth.securityContext }} + securityContext: {{- toYaml $auth.securityContext | nindent 10 }} {{- end }} volumeMounts: -{{- if .Values.enterprise }} +{{- if $auth.enterprise }} - mountPath: /var/lib/license name: "license" readOnly: true {{- end }} -{{- if and (.Values.gcp.credentialSecretName) (eq .Values.chartMode "gcp") }} +{{- if and ($auth.gcp.credentialSecretName) (eq $auth.chartMode "gcp") }} - mountPath: /etc/teleport-secrets name: "gcp-credentials" readOnly: true {{- end }} -{{- if .Values.tls.existingCASecretName }} +{{- if $auth.tls.existingCASecretName }} - mountPath: /etc/teleport-tls-ca name: "teleport-tls-ca" readOnly: true @@ -201,10 +201,11 @@ spec: readOnly: true - mountPath: /var/lib/teleport name: "data" -{{- if .Values.extraVolumeMounts }} - {{- toYaml .Values.extraVolumeMounts | nindent 8 }} +{{- if $auth.extraVolumeMounts }} + {{- toYaml $auth.extraVolumeMounts | nindent 8 }} {{- end }} -{{ if .Values.operator.enabled }} +{{- /* Operator uses '.Values' instead of '$auth' as it will likely be moved out of the auth pods */}} +{{- if .Values.operator.enabled }} - name: "operator" image: '{{ .Values.operator.image }}:{{ include "teleport-cluster.version" . }}' imagePullPolicy: {{ .Values.imagePullPolicy }} @@ -220,16 +221,11 @@ spec: port: 8081 initialDelaySeconds: 5 periodSeconds: 10 -{{- if .Values.operator.resources }} - resources: - {{- toYaml .Values.operator.resources | nindent 10 }} -{{- end }} + {{- if .Values.operator.resources }} + resources: {{- toYaml .Values.operator.resources | nindent 10 }} + {{- end }} volumeMounts: - {{- if .Values.highAvailability.certManager.enabled }} - - mountPath: /etc/teleport-tls - name: "teleport-tls" - readOnly: true - {{- else if .Values.tls.existingSecretName }} + {{- if .Values.tls.existingSecretName }} - mountPath: /etc/teleport-tls name: "teleport-tls" readOnly: true @@ -241,36 +237,36 @@ spec: name: "data" {{ end }} volumes: -{{- if .Values.enterprise }} +{{- if $auth.enterprise }} - name: license secret: secretName: "license" {{- end }} -{{- if and (.Values.gcp.credentialSecretName) (eq .Values.chartMode "gcp") }} +{{- if and ($auth.gcp.credentialSecretName) (eq $auth.chartMode "gcp") }} - name: gcp-credentials secret: - secretName: {{ .Values.gcp.credentialSecretName | quote }} + secretName: {{ $auth.gcp.credentialSecretName | quote }} {{- end }} -{{- if .Values.tls.existingCASecretName }} +{{- if $auth.tls.existingCASecretName }} - name: teleport-tls-ca secret: - secretName: {{ .Values.tls.existingCASecretName }} + secretName: {{ $auth.tls.existingCASecretName }} {{- end }} - name: "config" configMap: name: {{ .Release.Name }}-auth - name: "data" - {{- if and (.Values.persistence.enabled) ( and (not (eq .Values.chartMode "gcp")) (not (eq .Values.chartMode "aws"))) }} + {{- if and ($auth.persistence.enabled) ( and (not (eq $auth.chartMode "gcp")) (not (eq $auth.chartMode "aws"))) }} persistentVolumeClaim: claimName: {{ if $auth.persistence.existingClaimName }}{{ $auth.persistence.existingClaimName }}{{ else }}{{ .Release.Name }}{{ end }} {{- else }} emptyDir: {} {{- end }} -{{- if .Values.extraVolumes }} - {{- toYaml .Values.extraVolumes | nindent 6 }} +{{- if $auth.extraVolumes }} + {{- toYaml $auth.extraVolumes | nindent 6 }} {{- end }} -{{- if .Values.priorityClassName }} - priorityClassName: {{ .Values.priorityClassName }} +{{- if $auth.priorityClassName }} + priorityClassName: {{ $auth.priorityClassName }} {{- end }} serviceAccountName: {{ include "teleport-cluster.auth.serviceAccountName" . }} terminationGracePeriodSeconds: {{ $auth.terminationGracePeriodSeconds }} From 8841898679602cba9bd5f11352655f5d03a10fa9 Mon Sep 17 00:00:00 2001 From: Hugo Shaka Date: Thu, 12 Jan 2023 17:00:25 -0500 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Gus Luxton --- .../templates/auth/service-previous-version.yaml | 4 +--- examples/chart/teleport-cluster/values.yaml | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/chart/teleport-cluster/templates/auth/service-previous-version.yaml b/examples/chart/teleport-cluster/templates/auth/service-previous-version.yaml index d10392489a43c..75b4b06262f04 100644 --- a/examples/chart/teleport-cluster/templates/auth/service-previous-version.yaml +++ b/examples/chart/teleport-cluster/templates/auth/service-previous-version.yaml @@ -22,9 +22,7 @@ metadata: namespace: {{ .Release.Namespace }} labels: {{- include "teleport-cluster.auth.labels" . | nindent 4 }} spec: - # This is a headless service. Resolving it will return the list of all auth pods running the previous major version - # Proxies should not connect to auth pods from the previous major version - # Proxy rollout should be held until this headLessService does not match pods anymore. + # This is a headless service. Resolving it will return the list of all auth pods running the current major version clusterIP: "None" # Publishing not ready addresses ensures that unhealthy or terminating pods are still accounted for publishNotReadyAddresses: true diff --git a/examples/chart/teleport-cluster/values.yaml b/examples/chart/teleport-cluster/values.yaml index 2a54ed4da8a31..325dfa0b03368 100644 --- a/examples/chart/teleport-cluster/values.yaml +++ b/examples/chart/teleport-cluster/values.yaml @@ -469,7 +469,7 @@ tolerations: [] # https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ probeTimeoutSeconds: 1 -# Kubernetes termination grave period +# Kubernetes termination grace period # https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#hook-handler-execution # # This should be greater than 30 seconds as pods are waiting 30 seconds in a preStop hook. From 40aab0bf69d1cd75ea89e3afd0b742d8483a1b51 Mon Sep 17 00:00:00 2001 From: Hugo Shaka Date: Fri, 13 Jan 2023 09:32:57 -0500 Subject: [PATCH 5/5] fixup! Merge branch 'master' into hugo/helm-wait-hooks --- .../__snapshot__/auth_deployment_test.yaml.snap | 9 +++++++++ .../proxy_deployment_test.yaml.snap | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap b/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap index c670819eaeb39..62126ce3f58b0 100644 --- a/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap +++ b/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap @@ -131,6 +131,14 @@ should set nodeSelector when set in values: - --apply-on-startup=/etc/teleport/apply-on-startup.yaml image: public.ecr.aws/gravitational/teleport:12.0.0-dev imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s livenessProbe: failureThreshold: 6 httpGet: @@ -162,6 +170,7 @@ should set nodeSelector when set in values: environment: security role: bastion serviceAccountName: RELEASE-NAME + terminationGracePeriodSeconds: 60 volumes: - configMap: name: RELEASE-NAME-auth diff --git a/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap b/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap index 10f3c75625844..7d6b0148f523e 100644 --- a/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap +++ b/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap @@ -60,6 +60,14 @@ should set nodeSelector when set in values: - --diag-addr=0.0.0.0:3000 image: public.ecr.aws/gravitational/teleport:12.0.0-dev imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: + - teleport + - wait + - duration + - 30s livenessProbe: failureThreshold: 6 httpGet: @@ -87,10 +95,19 @@ should set nodeSelector when set in values: readOnly: true - mountPath: /var/lib/teleport name: data + initContainers: + - command: + - teleport + - wait + - no-resolve + - RELEASE-NAME-auth-v11.NAMESPACE.svc.cluster.local + image: public.ecr.aws/gravitational/teleport:12.0.0-dev + name: wait-auth-update nodeSelector: environment: security role: bastion serviceAccountName: RELEASE-NAME-proxy + terminationGracePeriodSeconds: 60 volumes: - configMap: name: RELEASE-NAME-proxy