Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions examples/chart/teleport-cluster/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ if serviceAccount is not defined or serviceAccount.name is empty, use .Release.N
{{- (semver (include "teleport-cluster.version" .)).Major -}}
{{- end -}}

{{- define "teleport-cluster.previousMajorVersion" -}}
{{- sub (include "teleport-cluster.majorVersion" . | atoi ) 1 -}}
{{- end -}}

{{/* Proxy selector labels */}}
{{- define "teleport-cluster.proxy.selectorLabels" -}}
app.kubernetes.io/name: '{{ default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}'
Expand Down Expand Up @@ -71,6 +75,15 @@ teleport.dev/majorVersion: '{{ include "teleport-cluster.majorVersion" . }}'
{{- .Release.Name | trunc 58 | trimSuffix "-" -}}-auth
{{- end -}}

{{- define "teleport-cluster.auth.currentVersionServiceName" -}}
{{- .Release.Name | trunc 54 | trimSuffix "-" -}}-auth-v{{ include "teleport-cluster.majorVersion" . }}
{{- end -}}

{{- define "teleport-cluster.auth.previousVersionServiceName" -}}
{{- .Release.Name | trunc 54 | trimSuffix "-" -}}-auth-v{{ include "teleport-cluster.previousMajorVersion" . }}
{{- end -}}


{{/* In most places we want to use the FQDN instead of relying on Kubernetes ndots behaviour
for performance reasons */}}
{{- define "teleport-cluster.auth.serviceFQDN" -}}
Expand Down
68 changes: 37 additions & 31 deletions examples/chart/teleport-cluster/templates/auth/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -161,31 +161,40 @@ spec:
periodSeconds: 5 # poll health every 5s
failureThreshold: 12 # consider agent unhealthy after 60s (12 * 5s)
timeoutSeconds: {{ .Values.probeTimeoutSeconds }}
{{- if .Values.postStart.command }}
lifecycle:
# waiting during preStop ensures no new request will hit the Terminating pod
# on clusters using kube-proxy (kube-proxy syncs the node iptables rules every 30s)
preStop:
exec:
command:
- teleport
- wait
- duration
- 30s
{{- if $auth.postStart.command }}
postStart:
exec:
command: {{ toYaml .Values.postStart.command | nindent 14 }}
command: {{ toYaml $auth.postStart.command | nindent 14 }}
{{- end }}
{{- if .Values.resources }}
{{- if $auth.resources }}
resources:
{{- toYaml .Values.resources | nindent 10 }}
{{- toYaml $auth.resources | nindent 10 }}
{{- end }}
{{- if .Values.securityContext }}
securityContext: {{- toYaml .Values.securityContext | nindent 10 }}
{{- if $auth.securityContext }}
securityContext: {{- toYaml $auth.securityContext | nindent 10 }}
{{- end }}
volumeMounts:
{{- if .Values.enterprise }}
{{- if $auth.enterprise }}
- mountPath: /var/lib/license
name: "license"
readOnly: true
{{- end }}
{{- if and (.Values.gcp.credentialSecretName) (eq .Values.chartMode "gcp") }}
{{- if and ($auth.gcp.credentialSecretName) (eq $auth.chartMode "gcp") }}
- mountPath: /etc/teleport-secrets
name: "gcp-credentials"
readOnly: true
{{- end }}
{{- if .Values.tls.existingCASecretName }}
{{- if $auth.tls.existingCASecretName }}
- mountPath: /etc/teleport-tls-ca
name: "teleport-tls-ca"
readOnly: true
Expand All @@ -195,10 +204,11 @@ spec:
readOnly: true
- mountPath: /var/lib/teleport
name: "data"
{{- if .Values.extraVolumeMounts }}
{{- toYaml .Values.extraVolumeMounts | nindent 8 }}
{{- if $auth.extraVolumeMounts }}
{{- toYaml $auth.extraVolumeMounts | nindent 8 }}
{{- end }}
{{ if .Values.operator.enabled }}
{{- /* Operator uses '.Values' instead of '$auth' as it will likely be moved out of the auth pods */}}
{{- if .Values.operator.enabled }}
- name: "operator"
image: '{{ .Values.operator.image }}:{{ include "teleport-cluster.version" . }}'
imagePullPolicy: {{ .Values.imagePullPolicy }}
Expand All @@ -214,16 +224,11 @@ spec:
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
{{- if .Values.operator.resources }}
resources:
{{- toYaml .Values.operator.resources | nindent 10 }}
{{- end }}
{{- if .Values.operator.resources }}
resources: {{- toYaml .Values.operator.resources | nindent 10 }}
{{- end }}
volumeMounts:
{{- if .Values.highAvailability.certManager.enabled }}
- mountPath: /etc/teleport-tls
name: "teleport-tls"
readOnly: true
{{- else if .Values.tls.existingSecretName }}
{{- if .Values.tls.existingSecretName }}
- mountPath: /etc/teleport-tls
name: "teleport-tls"
readOnly: true
Expand All @@ -235,35 +240,36 @@ spec:
name: "data"
{{ end }}
volumes:
{{- if .Values.enterprise }}
{{- if $auth.enterprise }}
- name: license
secret:
secretName: "license"
{{- end }}
{{- if and (.Values.gcp.credentialSecretName) (eq .Values.chartMode "gcp") }}
{{- if and ($auth.gcp.credentialSecretName) (eq $auth.chartMode "gcp") }}
- name: gcp-credentials
secret:
secretName: {{ .Values.gcp.credentialSecretName | quote }}
secretName: {{ $auth.gcp.credentialSecretName | quote }}
{{- end }}
{{- if .Values.tls.existingCASecretName }}
{{- if $auth.tls.existingCASecretName }}
- name: teleport-tls-ca
secret:
secretName: {{ .Values.tls.existingCASecretName }}
secretName: {{ $auth.tls.existingCASecretName }}
{{- end }}
- name: "config"
configMap:
name: {{ .Release.Name }}-auth
- name: "data"
{{- if and (.Values.persistence.enabled) ( and (not (eq .Values.chartMode "gcp")) (not (eq .Values.chartMode "aws"))) }}
{{- if and ($auth.persistence.enabled) ( and (not (eq $auth.chartMode "gcp")) (not (eq $auth.chartMode "aws"))) }}
persistentVolumeClaim:
claimName: {{ if $auth.persistence.existingClaimName }}{{ $auth.persistence.existingClaimName }}{{ else }}{{ .Release.Name }}{{ end }}
{{- else }}
emptyDir: {}
{{- end }}
{{- if .Values.extraVolumes }}
{{- toYaml .Values.extraVolumes | nindent 6 }}
{{- if $auth.extraVolumes }}
{{- toYaml $auth.extraVolumes | nindent 6 }}
{{- end }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- if $auth.priorityClassName }}
priorityClassName: {{ $auth.priorityClassName }}
{{- end }}
serviceAccountName: {{ include "teleport-cluster.auth.serviceAccountName" . }}
terminationGracePeriodSeconds: {{ $auth.terminationGracePeriodSeconds }}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: v1
kind: Service
metadata:
name: {{ .Release.Name }}-auth-old
name: {{ include "teleport-cluster.auth.previousVersionServiceName" . }}
namespace: {{ .Release.Namespace }}
labels: {{- include "teleport-cluster.auth.labels" . | nindent 4 }}
spec:
Expand All @@ -13,4 +13,19 @@ spec:
publishNotReadyAddresses: true
selector:
{{- include "teleport-cluster.auth.selectorLabels" . | nindent 4 }}
teleport.dev/majorVersion: {{ sub (include "teleport-cluster.majorVersion" . | atoi ) 1 | quote }}
teleport.dev/majorVersion: {{ include "teleport-cluster.previousMajorVersion" . | quote }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "teleport-cluster.auth.currentVersionServiceName" . }}
namespace: {{ .Release.Namespace }}
labels: {{- include "teleport-cluster.auth.labels" . | nindent 4 }}
spec:
# This is a headless service. Resolving it will return the list of all auth pods running the current major version
clusterIP: "None"
# Publishing not ready addresses ensures that unhealthy or terminating pods are still accounted for
publishNotReadyAddresses: true
selector:
{{- include "teleport-cluster.auth.selectorLabels" . | nindent 4 }}
teleport.dev/majorVersion: {{ include "teleport-cluster.majorVersion" . | quote }}
23 changes: 21 additions & 2 deletions examples/chart/teleport-cluster/templates/proxy/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,17 @@ spec:
{{- if $proxy.tolerations }}
tolerations: {{- toYaml $proxy.tolerations | nindent 6 }}
{{- end }}
{{- if $proxy.initContainers }}
initContainers:
# wait-auth-update is responsible for holding off the proxy rollout until all auths are running the
# next major version in case of major upgrade.
- name: wait-auth-update
image: '{{ if $proxy.enterprise }}{{ $proxy.enterpriseImage }}{{ else }}{{ $proxy.image }}{{ end }}:{{ include "teleport-cluster.version" . }}'
command:
- teleport
- wait
- no-resolve
- '{{ include "teleport-cluster.auth.previousVersionServiceName" . }}.{{ .Release.Namespace }}.svc.cluster.local'
{{- if $proxy.initContainers }}
{{- range $initContainer := $proxy.initContainers }}
{{- if and (not $initContainer.resources) $proxy.resources }}
{{- $_ := set $initContainer "resources" $proxy.resources }}
Expand Down Expand Up @@ -164,8 +173,17 @@ spec:
periodSeconds: 5 # poll health every 5s
failureThreshold: 12 # consider agent unhealthy after 60s (12 * 5s)
timeoutSeconds: {{ $proxy.probeTimeoutSeconds }}
{{- if $proxy.postStart.command }}
lifecycle:
# waiting during preStop ensures no new request will hit the Terminating pod
# on clusters using kube-proxy (kube-proxy syncs the node iptables rules every 30s)
preStop:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we could also use this to have proxies unregister themselves on shutdown in future to mitigate #20057

Copy link
Copy Markdown
Contributor Author

@hugoShaka hugoShaka Jan 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unregistering would be a nice improvement, but I'm not sure preStop is the best place because it is implementation-specific (Terraform/CloudFormation have the same issue), and we don't have control over the concurrency with preStop. Maybe the proxy will take 2 minutes to shut down, and I don't know what would happen if it gets unregistered while it has not finished doing its job. IMO the best place to do it would be in the graceful shutdown sequence.

Agendas are a little packed right now because of v12, but I'll raise this issue to folks who know more than me about registration and shutdown sequence once things settle down.

exec:
command:
- teleport
- wait
- duration
- 30s
{{- if $proxy.postStart.command }}
postStart:
exec:
command: {{ toYaml $proxy.postStart.command | nindent 14 }}
Expand Down Expand Up @@ -227,3 +245,4 @@ spec:
priorityClassName: {{ $proxy.priorityClassName }}
{{- end }}
serviceAccountName: {{ include "teleport-cluster.proxy.serviceAccountName" . }}
terminationGracePeriodSeconds: {{ $proxy.terminationGracePeriodSeconds }}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ should add an operator side-car when operator is enabled:
- --apply-on-startup=/etc/teleport/apply-on-startup.yaml
image: public.ecr.aws/gravitational/teleport:12.0.0-dev
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
exec:
command:
- teleport
- wait
- duration
- 30s
livenessProbe:
failureThreshold: 6
httpGet:
Expand Down Expand Up @@ -60,6 +68,7 @@ should add an operator side-car when operator is enabled:
- mountPath: /var/lib/teleport
name: data
serviceAccountName: RELEASE-NAME
terminationGracePeriodSeconds: 60
volumes:
- configMap:
name: RELEASE-NAME-auth
Expand Down Expand Up @@ -122,6 +131,14 @@ should set nodeSelector when set in values:
- --apply-on-startup=/etc/teleport/apply-on-startup.yaml
image: public.ecr.aws/gravitational/teleport:12.0.0-dev
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
exec:
command:
- teleport
- wait
- duration
- 30s
livenessProbe:
failureThreshold: 6
httpGet:
Expand Down Expand Up @@ -153,6 +170,7 @@ should set nodeSelector when set in values:
environment: security
role: bastion
serviceAccountName: RELEASE-NAME
terminationGracePeriodSeconds: 60
volumes:
- configMap:
name: RELEASE-NAME-auth
Expand Down Expand Up @@ -185,6 +203,14 @@ should set resources when set in values:
- --apply-on-startup=/etc/teleport/apply-on-startup.yaml
image: public.ecr.aws/gravitational/teleport:12.0.0-dev
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
exec:
command:
- teleport
- wait
- duration
- 30s
livenessProbe:
failureThreshold: 6
httpGet:
Expand Down Expand Up @@ -220,6 +246,7 @@ should set resources when set in values:
- mountPath: /var/lib/teleport
name: data
serviceAccountName: RELEASE-NAME
terminationGracePeriodSeconds: 60
volumes:
- configMap:
name: RELEASE-NAME-auth
Expand All @@ -237,6 +264,14 @@ should set securityContext when set in values:
- --apply-on-startup=/etc/teleport/apply-on-startup.yaml
image: public.ecr.aws/gravitational/teleport:12.0.0-dev
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
exec:
command:
- teleport
- wait
- duration
- 30s
livenessProbe:
failureThreshold: 6
httpGet:
Expand Down Expand Up @@ -272,6 +307,7 @@ should set securityContext when set in values:
- mountPath: /var/lib/teleport
name: data
serviceAccountName: RELEASE-NAME
terminationGracePeriodSeconds: 60
volumes:
- configMap:
name: RELEASE-NAME-auth
Expand Down Expand Up @@ -299,6 +335,14 @@ should use OSS image and not mount license when enterprise is not set in values:
- --apply-on-startup=/etc/teleport/apply-on-startup.yaml
image: public.ecr.aws/gravitational/teleport:8.3.4
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
exec:
command:
- teleport
- wait
- duration
- 30s
livenessProbe:
failureThreshold: 6
httpGet:
Expand Down Expand Up @@ -327,6 +371,7 @@ should use OSS image and not mount license when enterprise is not set in values:
- mountPath: /var/lib/teleport
name: data
serviceAccountName: RELEASE-NAME
terminationGracePeriodSeconds: 60
volumes:
- configMap:
name: RELEASE-NAME-auth
Expand Down
Loading