diff --git a/docs/config.json b/docs/config.json index a18fb1b26d7d1..8d067e4314003 100644 --- a/docs/config.json +++ b/docs/config.json @@ -726,6 +726,21 @@ "forScopes": [ "enterprise" ] + }, + { + "title": "Self-hosted automatic updates", + "slug": "/management/operations/self-hosted-automatic-agent-updates/", + "forScopes": [ + "enterprise" + ] + }, + { + "title": "Enroll agent in automatic updates", + "slug": "/management/operations/enroll-agent-into-automatic-updates/", + "forScopes": [ + "enterprise", + "cloud" + ] } ] }, @@ -1586,6 +1601,11 @@ { "title": "Proxy Peering (Preview)", "slug": "/architecture/proxy-peering/" + }, + { + "title": "Agent Update Management", + "slug": "/architecture/agent-update-management/", + "forScopes": ["enterprise", "cloud"] } ] }, diff --git a/docs/cspell.json b/docs/cspell.json index fb8a675babcf5..c6997a1db3255 100644 --- a/docs/cspell.json +++ b/docs/cspell.json @@ -511,6 +511,7 @@ "nameid", "nameidentifier", "nameserver", + "namespacedname", "navin", "ndots", "netcat", @@ -627,6 +628,7 @@ "setspn", "sharded", "signup", + "skipreconcile", "slacktokenfromsecret", "snowsql", "splunkd", diff --git a/docs/pages/architecture/agent-update-management.mdx b/docs/pages/architecture/agent-update-management.mdx new file mode 100644 index 0000000000000..95a8079af7e96 --- /dev/null +++ b/docs/pages/architecture/agent-update-management.mdx @@ -0,0 +1,95 @@ +--- +title: Agent Update Management (Preview) +description: This chapter explains how Teleport agent automatic update is working. +--- + +While many Teleport resources [support agentless +mode](../faq.mdx#can-teleport-be-deployed-in-agentless-mode), agent deployments +are sometimes simpler and more convenient. However, large Teleport deployments +can create an additional burden: updating all agents. + +Starting with version 13, Teleport supports automatic agent updates for systemd-based +Linux distributions using `apt` or `yum` package managers, and Kubernetes clusters. + +## Update logic and failure modes + +An updater is a piece of software deployed next to an agent which is responsible +for updating it. Updating multiple agents requires multiple updaters. + +We designed the updater to be as decoupled from Teleport as possible. The +updater can update agents even when they cannot join the Teleport cluster. +Pushing a broken version can happen, but a rollback/roll-forward must always +be possible without manually connecting to the resource and fixing the agent. + +The updater recurrently fetches the target version from a version server and +updates the agent to the target version. Because restarting the agent can +disrupt currently open sessions, it will only update the agent in two cases: +during a maintenance window or when the agent is unhealthy. + +When enrolled in a cluster with automatic updates, the agent will retrieve +its maintenance schedule from the Teleport cluster and save it. When a +maintenance schedule is available, the updater will honour it. However, if +the updater cannot find the maintenance schedule, it will consider the agent +unhealthy and perform updates as soon as possible. Similarly, if the updater +detects the agent is unhealthy, it immediately applies any pending update to +try to recover from a degraded state. + +We implemented an additional failsafe: the critical maintenance toggle. +The version server can specify that an update is critical. Critical updates are +applied even if the updater is outside its regular maintenance window. + +## Security + +When updating the agent, the updater will ensure the new version's authenticity +before deploying it. On Linux distributions using `apt` or `yum`, it relies on +the existing package signature system. On Kubernetes-based environments, it +validates the OCI image signature (using [cosign's signature +](https://github.com/sigstore/cosign/blob/main/specs/SIGNATURE_SPEC.md)). + +## Version server and source of truth + +The agent version is subject to the following constraints: + +- the agent must never exceed the Proxy or Auth Service version, +- the agent must always be no more than one major version below the Proxy or Auth + Service version. + +The best practice is to always align the agent version with the Proxy and Auth +ones. To upgrade Auth and Proxy, follow [the Teleport Cluster upgrade guide +](../management/operations/upgrading.mdx). + +For this reason, all updaters must subscribe to a release channel targeting +versions that are compatible with their Teleport cluster. Teleport Cloud users +must use the Teleport Cloud version server with the `stable/cloud` release +channel. Self-hosted Teleport users must host their own version server and +updater their release channel each time they update their Auth and Proxy +instances. + +### Teleport Cloud + +Teleport Cloud users can use Teleport Cloud's version server only if their +instance is enrolled in automatic updates. This version server will always +target the best version from a feature, compatibility, security and stability +point of view. + +Teleport Cloud users whose control plane is not automatically updated must not use +automatic agent updates. This is because their Teleport instance version might +differ from the other Teleport Cloud instances and might not yet support the +latest agent version. + +### Self-hosted Teleport + +Self-hosted Teleport users can set up automatic agent updates. They must host +their version server and choose their target version. They are responsible for +ensuring the targeted version is compatible with their current auth/proxy +versions. They must also monitor the agent's health and rollout status to +ensure every agent is healthy and running the correct version. + +## Next steps + +Self-hosted users must first [set up self-hosted automatic agent update +](../management/operations/self-hosted-automatic-agent-updates.mdx). + +If you're a Teleport Cloud user or self-hosting with automatic update +configured, you can [enroll your agents into automatic updates +](../management/operations/enroll-agent-into-automatic-updates.mdx). diff --git a/docs/pages/architecture/introduction.mdx b/docs/pages/architecture/introduction.mdx index 1baad23fdf003..362de37e3aea5 100644 --- a/docs/pages/architecture/introduction.mdx +++ b/docs/pages/architecture/introduction.mdx @@ -9,9 +9,10 @@ works. - [Authentication](./authentication.mdx) - [Authorization](./authorization.mdx) +- [Automatic Agent Update](./agent-update-management.mdx) - [The Teleport Proxy Service](./proxy.mdx) +- [Trusted Clusters](./trustedclusters.mdx) - [Teleport Nodes](./nodes.mdx) - [Session Recording](./session-recording.mdx) - [TLS Routing](./tls-routing.mdx) - [Proxy Peering](./proxy-peering.mdx) -- [Trusted Clusters](./trustedclusters.mdx) diff --git a/docs/pages/includes/cluster-maintenance-config-spec.mdx b/docs/pages/includes/cluster-maintenance-config-spec.mdx new file mode 100644 index 0000000000000..234f2193ed012 --- /dev/null +++ b/docs/pages/includes/cluster-maintenance-config-spec.mdx @@ -0,0 +1,16 @@ +```yaml +kind: cluster_maintenance_config +spec: + agent_upgrades: + # Maintenance window start hour in UTC. + # The maintenance window lasts 1 hour. + utc_start_hour: 2 + # Week days when maintenance is allowed + # Possible values are: + # - Short names: Sun, Mon, Tue, Wed, Thu, Fri, Sat + # - Long names: Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday + weekdays: + - Mon + - Wed + - Fri +``` diff --git a/docs/pages/management/operations.mdx b/docs/pages/management/operations.mdx index 923cb1b5c44e1..def8f0267184b 100644 --- a/docs/pages/management/operations.mdx +++ b/docs/pages/management/operations.mdx @@ -16,3 +16,5 @@ the [Cluster Administration Guides](./admin.mdx) section. - [CA Rotation](./operations/ca-rotation.mdx): Rotating Teleport certificate authorities. - [TLS Routing Migration](./operations/tls-routing.mdx): Migrating your Teleport cluster to single-port TLS routing mode. - [Proxy Peering Migration](./operations/proxy-peering.mdx): Migrating your Teleport cluster to Proxy Peering mode. +- [Setup automatic agent updates for self-hosted Teleport](./operations/self-hosted-automatic-agent-updates.mdx) +- [Enroll agent into automatic updates](./operations/enroll-agent-into-automatic-updates.mdx) diff --git a/docs/pages/management/operations/enroll-agent-into-automatic-updates.mdx b/docs/pages/management/operations/enroll-agent-into-automatic-updates.mdx new file mode 100644 index 0000000000000..40d67a24a158b --- /dev/null +++ b/docs/pages/management/operations/enroll-agent-into-automatic-updates.mdx @@ -0,0 +1,261 @@ +--- +title: Enroll an agent into automatic updates (Preview) +description: How to enroll an agent into automatic updates +--- + +
+ Automatic agent update is available starting from Teleport `13.0`. +
+ +Teleport supports automatic agent updates for +systemd-based Linux distributions using `apt` or `yum` package managers, +and Kubernetes clusters. The [automatic updates architecture +page](../../architecture/agent-update-management.mdx) describes how agent +updating works. + +This guide explains how to enroll an existing Teleport agent into automatic +updates. + +## Requirements + + + +- A Teleport agent, either: + - started via systemd on a distribution using the `apt` or `yum` package managers + - deployed with the `teleport-kube-agent` Helm chart +- automatic update infrastructure set up. For Self-Hosted users this means you + already followed [this guide](./self-hosted-automatic-agent-updates.mdx) and + know your version server URL and release channel + + +- A Teleport agent, either: + - started via systemd on a distribution using the `apt` or `yum` package managers + - deployed with the `teleport-kube-agent` Helm chart +- as a Teleport Cloud user, you must check if your Could Tenant is enrolled + into automatic updates. + + + +## Enroll instructions + + + + +Create the upgrade configuration directory: + +```code +$ mkdir -p /etc/teleport-upgrade.d/ +``` + +If you changed the agent user to run as non-root, create +`/etc/teleport-upgrade.d/schedule` and grant ownership to your Teleport user. +Else, you can skip this step: + +```code +$ touch /etc/teleport-upgrade.d/schedule +$ chown /etc/teleport-upgrade.d/schedule +``` + +Configure the updater to connect to your custom version server and subscribe +to the right release channel: + +```code +$ echo / > /etc/teleport-upgrade.d/endpoint +``` + +Install the `teleport-ent-updater` package (note: your teleport agent will be restarted during install): + +```code +$ apt install teleport-ent-updater +# or +$ yum install teleport-ent-updater +``` + +Finally, verify that the updater can see your version endpoint: + +```code +$ teleport-upgrade dry-run +``` + +You should see one of the following messages, depending on the target version +you are currently serving: + +```text +no upgrades available (1.2.3 == 1.2.3) +an upgrade is available (1.2.3 -> 2.3.4) +``` + + +`teleport-upgrade` may complain about not having a valid upgrade schedule. +This is expected immediately after install as the maintenance schedule might +not be exported yet. + + + + + + +Add the following chart values to your existing agent `values.yaml`: + +```yaml +updater: + enabled: true + versionServer: https:// + releaseChannel: +``` + +Update the Helm chart release with the new values by running `helm upgrade`. + +You can validate the updater is running properly by checking if its pod is ready: + +```code +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +my-agent-0 1/1 Running 0 14m +my-agent-1 1/1 Running 0 14m +my-agent-2 1/1 Running 0 14m +my-agent-updater-d9f97f5dd-v57g9 1/1 Running 0 16m +``` + +And by consulting its logs: + +```code +$ kubectl logs -updater +2023-04-28T13:13:30Z INFO StatefulSet is already up-to-date, not updating. {"controller": "statefulset", "controllerGroup": "apps", "controllerKind": "StatefulSet", "StatefulSet": {"name":"my-agent","namespace":"agent"}, "namespace": "agent", "name": "my-agent", "reconcileID": "10419f20-a4c9-45d4-a16f-406866b7fc05", "namespacedname": "agent/my-agent", "kind": "StatefulSet", "err": "no new version (current: \"v12.2.3\", next: \"v12.2.3\")"} +``` + + + + +If you changed the agent user to run as non-root, create +`/etc/teleport-upgrade.d/schedule` and grant ownership to your Teleport user. +Else, you can skip this step: + +```code +$ mkdir -p /etc/teleport-upgrade.d/ +$ touch /etc/teleport-upgrade.d/schedule +$ chown /etc/teleport-upgrade.d/schedule +``` + +Install the `teleport-ent-updater` package (note: your teleport agent will be restarted during install): + +```code +$ apt install teleport-ent-updater +# or +$ yum install teleport-ent-updater +``` + +Finally, verify that the updater can see your version endpoint: + +```code +$ teleport-upgrade dry-run +``` + +You should see one of the following messages, depending on the target version +you are currently serving: + +```text +no upgrades available (1.2.3 == 1.2.3) +an upgrade is available (1.2.3 -> 2.3.4) +``` + + +`teleport-upgrade` may complain about not having a valid upgrade schedule. +This is expected immediately after install as the maintenance schedule might +not be exported yet. + + + + + +Add the following chart values to your existing agent `values.yaml`: + +```yaml +updater: + enabled: true +``` + +Update the Helm chart release with the new values by running `helm upgrade`. + +You can validate the updater is running properly by checking if its pod is ready: + +```code +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +my-agent-0 1/1 Running 0 14m +my-agent-1 1/1 Running 0 14m +my-agent-2 1/1 Running 0 14m +my-agent-updater-d9f97f5dd-v57g9 1/1 Running 0 16m +``` + +And by consulting its logs: + +```code +$ kubectl logs -updater +2023-04-28T13:13:30Z INFO StatefulSet is already up-to-date, not updating. {"controller": "statefulset", "controllerGroup": "apps", "controllerKind": "StatefulSet", "StatefulSet": {"name":"my-agent","namespace":"agent"}, "namespace": "agent", "name": "my-agent", "reconcileID": "10419f20-a4c9-45d4-a16f-406866b7fc05", "namespacedname": "agent/my-agent", "kind": "StatefulSet", "err": "no new version (current: \"v12.2.3\", next: \"v12.2.3\")"} +``` + + + + +## Troubleshooting + +If the agent is not automatically updated, you can: + + + + +Invoke manually the updater and look at its logs. + +```code +$ teleport-upgrade +``` + + + + +Consult the `teleport-kube-agent-updater` logs: + +```code +$ kubectl logs -updater +``` + + +The kubernetes updater responds to events, or is woken up every 30 minutes. +If you don't want to wait until the next reconciliation, you can trigger an +event. Any deployment update will sends an event, so the updater can be +triggered by annotating the resource: + +```code +kubectl annotate statefulset/ 'debug.teleport.dev/trigger-event=1' +``` + + + + +## Next steps + +You can temporarily suspend automatic updates for an agent: + + + + +Disable the systemd timer: + +```code +$ systemctl disable --now teleport-upgrade.timer +``` + + + +Annotate the agent deployment with `teleport.dev/skipreconcile: "true"`. +Either by setting the `annotations.deployment` value in Helm, or by patching +the deployment directly with `kubectl`. + + diff --git a/docs/pages/management/operations/self-hosted-automatic-agent-updates.mdx b/docs/pages/management/operations/self-hosted-automatic-agent-updates.mdx new file mode 100644 index 0000000000000..89c070fd1d977 --- /dev/null +++ b/docs/pages/management/operations/self-hosted-automatic-agent-updates.mdx @@ -0,0 +1,118 @@ +--- +title: Setting up self-hosted automatic agent updates (Preview) +description: How to setup automatic agent update for self-hosted Teleport +--- + +
+ Automatic agent update is available starting from Teleport `13.0`. +
+ +Teleport supports automatic agent updates for +systemd-based Linux distributions using `apt` or `yum` package managers, +and Kubernetes clusters. The [automatic updates architecture +page](../../architecture/agent-update-management.mdx) describes how agent +updating works. + +This guide covers how to set up the automatic update infrastructure. If this is +already done, or you are a Teleport Cloud user, you can directly +[enroll agents into automatic updates](./enroll-agent-into-automatic-updates.mdx). + +## Requirements + +- Self-hosted Teleport cluster running. +- `tctl` execution on the auth machine or a role allowing verbs `create`, `read`, + `update`, `delete` on the resource `cluster_maintenance_config`. +- Either: + - a public S3/GCS bucket, + - a web server accessible from all agents with valid TLS certificates. + +## Step 1/3. Create release channel files + +A release channel contains two pieces of information: the targeted version +and if the update is critical. Updaters subscribe to a release channel and will +update to the provided version during a maintenance window if possible. If the +update is critical, updaters will ignore the maintenance schedule and update as +soon as possible. + +Create a directory for the new release channel `current`. + +```code +$ mkdir current/ +``` + +Make the `current` release channel target the version (=teleport.version=): + +```code +$ echo -n "(=teleport.version=)" > current/version +``` + +And mark the update as not critical: + +```code +$ echo -n "no" > current/critical +``` + +## Step 2/3. Host the release channel files on the version server + +The release channel must be hosted on a webserver with trusted TLS certificates +and reachable by all agents. + +Public cloud buckets like Amazon S3 or Google Cloud Storage are good +candidates as they provide reliable low-maintenance file hosting. + +You can also serve the files with a regular webserver like `nginx`. +The release channel needs to be served over HTTPS. + +Choose a way to serve the release channel and setup the hosting. + +The webserver must answer the following queries: + +```code +$ curl https:///current/version +(=teleport.version=) + +$ curl https:///current/critical +no +``` + +The web server serving the release channel is called the version server. +Save the version server domain and release channel name (here respectively +`` and `current`) as they will be required +later to configure the agent updaters. + +## Step 3/3. Configure the maintenance schedule + +At this point the updaters can be configured to pull the version from the +release channel and update the agents. However, they still don't know when +they should perform updates. + +Agents can retrieve the maintenance schedule from the Teleport cluster and +pass it to the updater. In this step you'll configure the maintenance +schedule for the whole cluster. + +Create the following `cmc.yaml` manifest allowing maintenances on Monday, Wednesday +and Friday between 02:00 and 03:00 UTC. + +(!docs/pages/includes/cluster-maintenance-config-spec.mdx!) + +Finally, apply the manifest using `tctl`: + +```code +$ tctl create cmc.yaml +maintenance window has been updated +``` + +## Next steps + +At this point, the cluster is ready for agent automatic updates. +Agents configured to automatically update will fetch their version from the +version server. By changing the target version served by the version server +you can upgrade or downgrade the agents. + +You can now [enroll agents into automatic updates](./enroll-agent-into-automatic-updates.mdx). diff --git a/docs/pages/reference/resources.mdx b/docs/pages/reference/resources.mdx index 7fa009bbdf959..692ba1f7ba1c9 100644 --- a/docs/pages/reference/resources.mdx +++ b/docs/pages/reference/resources.mdx @@ -216,3 +216,9 @@ Device contains information identifying a trusted device. Global configuration options for the Web UI served by the Proxy Service. This resource is not set by default, which means a `tctl get ui` will result in an error if used before this resource has been set. (!docs/pages/includes/ui-config-spec.mdx!) + +### Cluster Maintenance Config + +Global configuration options for the agents enrolled into automatic updates. + +(!docs/pages/includes/cluster-maintenance-config-spec.mdx!)