diff --git a/docs/pages/reference/helm-reference/teleport-cluster.mdx b/docs/pages/reference/helm-reference/teleport-cluster.mdx index d165ee4274edb..45e012aedcda1 100644 --- a/docs/pages/reference/helm-reference/teleport-cluster.mdx +++ b/docs/pages/reference/helm-reference/teleport-cluster.mdx @@ -2323,3 +2323,46 @@ Kubernetes timeouts for the liveness and readiness probes. ```yaml probeTimeoutSeconds: 5 ``` + +## `readinessProbe` + +`readinessProbe` configures the readiness probe settings. +This can be tuned to keep proxy pods ready even when the auth is unavailable. + +The default values mark the pod unready after one minute of failing readiness probe. + +### `readinessProbe.initialDelaySeconds` + +| Type | Default value | +|-----------|---------------| +| `integer` | `5` | + +`readinessProbe.initialDelaySeconds` controls the number of seconds after the container has started before +liveness probes are initiated. More info [in the Kubernetes documentation +](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes) + +### `readinessProbe.periodSeconds` +| Type | Default value | +|-----------|---------------| +| `integer` | `5` | + +`readinessProbe.periodSeconds` controls how often (in seconds) to perform the probe. Minimum value is 1. + +### `readinessProbe.failureThreshold` + +| Type | Default value | +|-----------|---------------| +| `integer` | `12` | + +`readinessProbe.failureThreshold` is the minimum consecutive failures for the probe to be considered failed +after having succeeded. Minimum value is 1. +failureThreshold: 12 + +## `readinessProbe.successThreshold` + +| Type | Default value | +|-----------|---------------| +| `integer` | `1` | + +`readinessProbe.successThreshold` is the minimum consecutive successes for the probe to be considered +successful after having failed. Minimum value is 1. diff --git a/examples/chart/teleport-cluster/templates/auth/deployment.yaml b/examples/chart/teleport-cluster/templates/auth/deployment.yaml index aee44b69befe3..0aff6ed593874 100644 --- a/examples/chart/teleport-cluster/templates/auth/deployment.yaml +++ b/examples/chart/teleport-cluster/templates/auth/deployment.yaml @@ -177,15 +177,16 @@ spec: initialDelaySeconds: 5 # wait 5s for agent to start periodSeconds: 5 # poll health every 5s failureThreshold: 6 # consider agent unhealthy after 30s (6 * 5s) - timeoutSeconds: {{ .Values.probeTimeoutSeconds }} + timeoutSeconds: {{ $auth.probeTimeoutSeconds }} readinessProbe: httpGet: path: /readyz port: diag - initialDelaySeconds: 5 # wait 5s for agent to register - periodSeconds: 5 # poll health every 5s - failureThreshold: 12 # consider agent unhealthy after 60s (12 * 5s) - timeoutSeconds: {{ .Values.probeTimeoutSeconds }} + initialDelaySeconds: {{ $auth.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ $auth.readinessProbe.periodSeconds }} + failureThreshold: {{$auth.readinessProbe.failureThreshold}} + successThreshold: {{$auth.readinessProbe.successThreshold}} + timeoutSeconds: {{ $auth.probeTimeoutSeconds }} lifecycle: # waiting during preStop ensures no new request will hit the Terminating pod # on clusters using kube-proxy (kube-proxy syncs the node iptables rules every 30s) diff --git a/examples/chart/teleport-cluster/templates/proxy/deployment.yaml b/examples/chart/teleport-cluster/templates/proxy/deployment.yaml index 79bcd9c9f1371..a1adfa7e795f3 100644 --- a/examples/chart/teleport-cluster/templates/proxy/deployment.yaml +++ b/examples/chart/teleport-cluster/templates/proxy/deployment.yaml @@ -224,9 +224,10 @@ spec: httpGet: path: /readyz port: diag - initialDelaySeconds: 5 # wait 5s for agent to register - periodSeconds: 5 # poll health every 5s - failureThreshold: 12 # consider agent unhealthy after 60s (12 * 5s) + initialDelaySeconds: {{ $proxy.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ $proxy.readinessProbe.periodSeconds }} + failureThreshold: {{$proxy.readinessProbe.failureThreshold}} + successThreshold: {{$proxy.readinessProbe.successThreshold}} timeoutSeconds: {{ $proxy.probeTimeoutSeconds }} lifecycle: # waiting during preStop ensures no new request will hit the Terminating pod diff --git a/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap b/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap index e8fa97c4ca3b3..f6d04dacb68ce 100644 --- a/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap +++ b/examples/chart/teleport-cluster/tests/__snapshot__/auth_deployment_test.yaml.snap @@ -25,7 +25,7 @@ port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3000 @@ -44,7 +44,8 @@ port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 volumeMounts: - mountPath: /etc/teleport name: config @@ -158,7 +159,7 @@ should set nodeSelector when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3000 @@ -177,7 +178,8 @@ should set nodeSelector when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 volumeMounts: - mountPath: /etc/teleport name: config @@ -255,7 +257,7 @@ should set resources when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3000 @@ -274,7 +276,8 @@ should set resources when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 resources: limits: cpu: 2 @@ -341,7 +344,7 @@ should set securityContext when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3000 @@ -360,7 +363,8 @@ should set securityContext when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 securityContext: allowPrivilegeEscalation: false privileged: false @@ -437,7 +441,7 @@ should use OSS image and not mount license when enterprise is not set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3000 @@ -456,7 +460,8 @@ should use OSS image and not mount license when enterprise is not set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 volumeMounts: - mountPath: /etc/teleport name: config diff --git a/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap b/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap index a84e3be766441..61dbc69acb5dc 100644 --- a/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap +++ b/examples/chart/teleport-cluster/tests/__snapshot__/proxy_deployment_test.yaml.snap @@ -61,7 +61,7 @@ sets clusterDomain on Deployment Pods: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3080 @@ -89,7 +89,8 @@ sets clusterDomain on Deployment Pods: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 volumeMounts: - mountPath: /etc/teleport name: config @@ -218,7 +219,7 @@ should set nodeSelector when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3080 @@ -246,7 +247,8 @@ should set nodeSelector when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 volumeMounts: - mountPath: /etc/teleport name: config @@ -330,7 +332,7 @@ should set resources for wait-auth-update initContainer when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3080 @@ -358,7 +360,8 @@ should set resources for wait-auth-update initContainer when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 resources: limits: cpu: 2 @@ -438,7 +441,7 @@ should set resources when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3080 @@ -466,7 +469,8 @@ should set resources when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 resources: limits: cpu: 2 @@ -546,7 +550,7 @@ should set securityContext for initContainers when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3080 @@ -574,7 +578,8 @@ should set securityContext for initContainers when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 securityContext: allowPrivilegeEscalation: false privileged: false @@ -654,7 +659,7 @@ should set securityContext when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + timeoutSeconds: 5 name: teleport ports: - containerPort: 3080 @@ -682,7 +687,8 @@ should set securityContext when set in values: port: diag initialDelaySeconds: 5 periodSeconds: 5 - timeoutSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 securityContext: allowPrivilegeEscalation: false privileged: false diff --git a/examples/chart/teleport-cluster/tests/auth_deployment_test.yaml b/examples/chart/teleport-cluster/tests/auth_deployment_test.yaml index 49946a9500085..c619e79b81bc2 100644 --- a/examples/chart/teleport-cluster/tests/auth_deployment_test.yaml +++ b/examples/chart/teleport-cluster/tests/auth_deployment_test.yaml @@ -923,3 +923,30 @@ tests: - equal: path: spec.template.metadata.labels.baz value: overridden + + - it: sets readinessProbe values on Deployment Pods + template: auth/deployment.yaml + set: + clusterName: helm-lint + readinessProbe: + initialDelaySeconds: 9 + periodSeconds: 10 + failureThreshold: 11 + successThreshold: 12 + auth: + # we test an auth-specific override + readinessProbe: + initialDelaySeconds: 13 + asserts: + - equal: + path: spec.template.spec.containers[0].readinessProbe.periodSeconds + value: 10 + - equal: + path: spec.template.spec.containers[0].readinessProbe.failureThreshold + value: 11 + - equal: + path: spec.template.spec.containers[0].readinessProbe.successThreshold + value: 12 + - equal: + path: spec.template.spec.containers[0].readinessProbe.initialDelaySeconds + value: 13 diff --git a/examples/chart/teleport-cluster/tests/proxy_deployment_test.yaml b/examples/chart/teleport-cluster/tests/proxy_deployment_test.yaml index 8b10f191008ce..fc68406a0070a 100644 --- a/examples/chart/teleport-cluster/tests/proxy_deployment_test.yaml +++ b/examples/chart/teleport-cluster/tests/proxy_deployment_test.yaml @@ -1041,4 +1041,31 @@ tests: - matchSnapshot: {} - matchRegex: path: spec.template.spec.initContainers[0].command[3] - pattern: ".svc.test.com$" \ No newline at end of file + pattern: ".svc.test.com$" + + - it: sets readinessProbe values on Deployment Pods + template: proxy/deployment.yaml + set: + clusterName: helm-lint + readinessProbe: + initialDelaySeconds: 9 + periodSeconds: 10 + failureThreshold: 11 + successThreshold: 12 + proxy: + # we test an auth-specific override + readinessProbe: + initialDelaySeconds: 13 + asserts: + - equal: + path: spec.template.spec.containers[0].readinessProbe.periodSeconds + value: 10 + - equal: + path: spec.template.spec.containers[0].readinessProbe.failureThreshold + value: 11 + - equal: + path: spec.template.spec.containers[0].readinessProbe.successThreshold + value: 12 + - equal: + path: spec.template.spec.containers[0].readinessProbe.initialDelaySeconds + value: 13 diff --git a/examples/chart/teleport-cluster/values.yaml b/examples/chart/teleport-cluster/values.yaml index 071801af9d780..7c18b414d0bd0 100644 --- a/examples/chart/teleport-cluster/values.yaml +++ b/examples/chart/teleport-cluster/values.yaml @@ -794,7 +794,24 @@ tolerations: [] # Timeouts for the readiness and liveness probes # https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ -probeTimeoutSeconds: 1 +probeTimeoutSeconds: 5 + +# readinessProbe(object) -- configures the readiness probe settings. +# This can be tuned to keep proxy pods ready even when the auth is unavailable. +# +# The default values mark the pod unready after one minute of failing readiness probe. +readinessProbe: + # readinessProbe.initialDelaySeconds(int) -- controls the number of seconds after the container has started before + # liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + initialDelaySeconds: 5 + # readinessProbe.periodSeconds(int) -- controls how often (in seconds) to perform the probe. Minimum value is 1. + periodSeconds: 5 + # readinessProbe.failureThreshold(int) -- is the minimum consecutive failures for the probe to be considered failed + # after having succeeded. Minimum value is 1. + failureThreshold: 12 + # readinessProbe.successThreshold(int) -- is the minimum consecutive successes for the probe to be considered + # successful after having failed. Minimum value is 1. + successThreshold: 1 # Kubernetes termination grace period # https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#hook-handler-execution