From c38e41ad79aaa51dd0dd75d70b128ea3b0808d49 Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Sun, 21 Sep 2025 15:46:43 -0700 Subject: [PATCH 01/11] Update the helm chart to be more robust. --- deploy/helm/omni/Chart.yaml | 4 +- deploy/helm/omni/README.md | 632 ++++++++++++++++++ deploy/helm/omni/templates/deployment.yaml | 57 +- deploy/helm/omni/templates/ingress.yaml | 233 +++++++ .../omni/templates/poddisruptionbudget.yaml | 20 + deploy/helm/omni/templates/service.yaml | 30 +- deploy/helm/omni/templates/statefulset.yaml | 136 ++++ deploy/helm/omni/values.yaml | 87 ++- 8 files changed, 1194 insertions(+), 5 deletions(-) create mode 100644 deploy/helm/omni/README.md create mode 100644 deploy/helm/omni/templates/ingress.yaml create mode 100644 deploy/helm/omni/templates/poddisruptionbudget.yaml create mode 100644 deploy/helm/omni/templates/statefulset.yaml diff --git a/deploy/helm/omni/Chart.yaml b/deploy/helm/omni/Chart.yaml index f63a1a767..0fbd7899b 100644 --- a/deploy/helm/omni/Chart.yaml +++ b/deploy/helm/omni/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: omni description: A helm chart to deploy Omni on a Kubernetes cluster type: application -version: 0.0.3 -appVersion: "v1.2.0-beta.0" +version: 1.0.0 +appVersion: "v1.1.4" diff --git a/deploy/helm/omni/README.md b/deploy/helm/omni/README.md new file mode 100644 index 000000000..b1c41b405 --- /dev/null +++ b/deploy/helm/omni/README.md @@ -0,0 +1,632 @@ +# Omni Helm Chart + +A Helm chart for deploying Sidero Omni on Kubernetes clusters. + +## Overview + +Omni is a SaaS-native Talos Linux cluster fleet management platform that provides centralized management, monitoring, and orchestration capabilities for Talos Linux clusters. This Helm chart deploys Omni as a containerized application on Kubernetes with support for both embedded and external etcd configurations, automatic scaling, and comprehensive ingress management. + +## Table of Contents + +- [Prerequisites](#prerequisites) +- [Installation](#installation) + - [Add Helm Repository](#add-helm-repository) + - [Install Chart](#install-chart) +- [Configuration](#configuration) + - [Required Configuration](#required-configuration) + - [Authentication Configuration](#authentication-configuration) + - [Storage Configuration](#storage-configuration) + - [Security Configuration](#security-configuration) +- [Values Reference](#values-reference) + - [Global Configuration](#global-configuration) + - [Deployment Configuration](#deployment-configuration) + - [Service Configuration](#service-configuration) + - [Authentication Configuration](#authentication-configuration-1) + - [Resource Configuration](#resource-configuration) + - [Volume Configuration](#volume-configuration) + - [External etcd Configuration](#external-etcd-configuration) + - [Ingress Configuration](#ingress-configuration) + - [Pod Disruption Budget](#pod-disruption-budget) + - [Per-Service Annotations](#per-service-annotations) + - [Advanced Configuration](#advanced-configuration) +- [Architecture Decisions](#architecture-decisions) + - [Deployment vs StatefulSet](#deployment-vs-statefulset) + - [WireGuard Address Resolution](#wireguard-address-resolution) + - [Service Architecture](#service-architecture) +- [Port Configuration](#port-configuration) +- [Security Considerations](#security-considerations) + - [Required Capabilities](#required-capabilities) + - [Device Plugin Requirements](#device-plugin-requirements) + - [Network Policies](#network-policies) +- [Troubleshooting](#troubleshooting) + - [Common Issues](#common-issues) + - [Logs](#logs) + - [Debug Mode](#debug-mode) +- [Upgrading](#upgrading) + - [Backup](#backup) + - [Upgrade Process](#upgrade-process) +- [Uninstalling](#uninstalling) +- [Contributing](#contributing) +- [License](#license) + +## Prerequisites + +- Kubernetes 1.19+ +- Helm 3.2.0+ +- PersistentVolume provisioner support in the underlying infrastructure +- Device plugin support for `/dev/net/tun` (required for WireGuard functionality) + +## Installation + +### Add Helm Repository + +```bash +# Add the repository (if available) +helm repo add sidero https://charts.sidero.dev +helm repo update +``` + +### Install Chart + +```bash +helm install omni sidero/omni \ + --namespace omni-system \ + --create-namespace \ + --set domainName=omni.example.com \ + --set accountUuid=your-account-uuid \ + --set auth.auth0.clientId=your-auth0-client-id \ + --set auth.auth0.domain=https://your-auth0-domain +``` + +## Configuration + +### Required Configuration + +The following values must be configured before deployment: + +| Parameter | Description | Required | +|-----------|-------------|----------| +| `domainName` | Primary domain name for Omni API access | Yes | +| `accountUuid` | Unique account identifier | Yes | +| `auth.auth0.clientId` | Auth0 client ID (if using Auth0) | Conditional | +| `auth.auth0.domain` | Auth0 domain (if using Auth0) | Conditional | + +### Authentication Configuration + +Omni supports two authentication methods: + +#### Auth0 Authentication + +```yaml +auth: + auth0: + enabled: true + clientId: "your-auth0-client-id" + domain: "https://your-auth0-domain" +``` + +#### SAML Authentication + +```yaml +auth: + saml: + enabled: true + url: "https://your-saml-provider" +``` + +### Storage Configuration + +#### Embedded etcd (Default) + +**New Deployments**: When using embedded etcd (`etcd.external: false`), Omni is deployed as a StatefulSet with automatic PVC provisioning: + +```yaml +etcd: + external: false +volumes: + etcd: + size: "50Gi" + storageClass: "fast-ssd" # optional +``` + +**Existing Deployments**: Continue using Deployment with manual PVC management: + +```yaml +volumes: + etcd: + persistentVolumeClaimName: omni-pvc # Must exist before deployment +``` + +**Critical Limitation**: Embedded etcd is hardcoded to 1 replica because Omni's embedded etcd does not support clustering. The `deployment.replicaCount` setting is ignored when using embedded etcd. Attempting to scale beyond 1 replica would result in data corruption and split-brain scenarios. + +**When to use embedded etcd**: +- Single-instance deployments +- Development and testing environments +- Small-scale production deployments where high availability is provided at the infrastructure level + +**When to use external etcd**: +- Multi-replica deployments for high availability +- Large-scale production environments +- When you need horizontal scaling capabilities + +#### External etcd + +When using external etcd (`etcd.external: true`), Omni is deployed as a Deployment without persistent storage: + +```yaml +etcd: + external: true + endpoints: + - "https://etcd-1.example.com:2379" + - "https://etcd-2.example.com:2379" + - "https://etcd-3.example.com:2379" +``` + +This configuration enables horizontal scaling with `deployment.replicaCount > 1`. + +### Security Configuration + +#### GPG Key Configuration + +Omni requires a GPG private key for signing operations: + +```yaml +privateKeySource: "file:///omni.asc" +volumes: + gpg: + secretName: gpg-secret +``` + +Create the secret: +```bash +kubectl create secret generic gpg-secret \ + --from-file=omni.asc=/path/to/your/private.key \ + --namespace omni-system +``` + +#### TLS Configuration + +For production deployments, configure TLS certificates: + +```yaml +volumeMounts: + tls: + mountPath: "/etc/omni/tls" + readOnly: true +volumes: + tls: + secretName: tls-secret +``` + +Create the TLS secret: +```bash +kubectl create secret tls tls-secret \ + --cert=/path/to/tls.crt \ + --key=/path/to/tls.key \ + --namespace omni-system +``` + +## Values Reference + +### Global Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `nameOverride` | Override the chart name | `""` | +| `domainName` | Primary domain name for Omni | `omni.example.com` | +| `accountUuid` | Account UUID | `""` | +| `name` | Instance name | `"My Omni instance"` | +| `privateKeySource` | GPG private key source path | `"file:///omni.asc"` | +| `initialUsers` | List of initial user emails | `[]` | +| `includeGenericDevicePlugin` | Include generic device plugin | `true` | + +### Deployment Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `deployment.image` | Container image repository | `ghcr.io/siderolabs/omni` | +| `deployment.tag` | Container image tag | `"latest"` | +| `deployment.replicaCount` | Number of replicas | `1` | +| `deployment.imagePullPolicy` | Image pull policy | `IfNotPresent` | +| `deployment.annotations` | Deployment annotations | `{}` | + +### Service Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `service.type` | Kubernetes service type | `ClusterIP` | +| `service.siderolink.domainName` | Siderolink API domain | `omni.siderolink.example.com` | +| `service.siderolink.wireguard.address` | WireGuard advertised address (optional) | `""` | +| `service.siderolink.wireguard.port` | WireGuard service port | `30180` | +| `service.siderolink.wireguard.type` | WireGuard service type | `NodePort` | +| `service.siderolink.wireguard.externalTrafficPolicy` | Traffic policy for NodePort/LoadBalancer | `Cluster` | +| `service.k8sProxy.domainName` | Kubernetes proxy domain | `omni.kubernetes.example.com` | + +#### WireGuard Service Configuration + +The WireGuard service supports flexible addressing: + +**Automatic DNS Resolution** (default): +```yaml +service: + siderolink: + wireguard: + address: "" # Uses wireguard.namespace.svc.cluster.local +``` + +**Explicit Address**: +```yaml +service: + siderolink: + wireguard: + address: "192.168.1.100" # External IP or FQDN +``` + +**Load Balancer Configuration**: +```yaml +service: + siderolink: + wireguard: + type: LoadBalancer + externalTrafficPolicy: Local # Preserves client IP +``` + +### Authentication Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `auth.auth0.enabled` | Enable Auth0 authentication | `true` | +| `auth.auth0.clientId` | Auth0 client ID | `"123456"` | +| `auth.auth0.domain` | Auth0 domain | `"https://www.auth0.example"` | +| `auth.saml.enabled` | Enable SAML authentication | `false` | +| `auth.saml.url` | SAML provider URL | `"https://www.saml.example"` | + +### Resource Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `resources.requests.cpu` | CPU request | `100m` | +| `resources.requests.memory` | Memory request | `128Mi` | +| `resources.limits.cpu` | CPU limit | `200m` | +| `resources.limits.memory` | Memory limit | `256Mi` | +| `resources.limits["squat.ai/tun"]` | TUN device limit | `1` | + +### Volume Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `volumes.etcd.persistentVolumeClaimName` | etcd PVC name (existing deployments) | `omni-pvc` | +| `volumes.etcd.size` | etcd storage size (StatefulSet only) | `"50Gi"` | +| `volumes.etcd.storageClass` | Storage class for etcd PVC (optional) | `""` | +| `volumes.tls.secretName` | TLS secret name | `null` | +| `volumes.gpg.secretName` | GPG secret name | `gpg` | +| `volumeMounts.tls.mountPath` | TLS mount path | `null` | +| `volumeMounts.omniAsc.mountPath` | GPG key mount path | `"/omni.asc"` | + +### External etcd Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `etcd.external` | Use external etcd cluster | `false` | +| `etcd.endpoints` | etcd cluster endpoints | `[]` | +| `etcd.username` | etcd username (direct) | `""` | +| `etcd.password` | etcd password (direct) | `""` | +| `etcd.auth.secretName` | Secret containing etcd credentials | `""` | +| `etcd.tls.enabled` | Enable TLS for etcd | `false` | +| `etcd.tls.secretName` | Secret containing TLS certificates | `""` | + +#### etcd Authentication + +**Direct credentials**: +```yaml +etcd: + username: "omni-user" + password: "secure-password" +``` + +**Secret-based credentials**: +```yaml +etcd: + auth: + secretName: "etcd-auth" + usernameKey: "username" # optional, defaults to "username" + passwordKey: "password" # optional, defaults to "password" +``` + +#### etcd TLS Configuration + +**File paths**: +```yaml +etcd: + tls: + enabled: true + certFile: "/etc/etcd/tls/client.crt" + keyFile: "/etc/etcd/tls/client.key" + caFile: "/etc/etcd/tls/ca.crt" +``` + +**Secret-based certificates**: +```yaml +etcd: + tls: + enabled: true + secretName: "etcd-tls" + certKey: "client.crt" # optional, defaults to "client.crt" + keyKey: "client.key" # optional, defaults to "client.key" + caKey: "ca.crt" # optional, defaults to "ca.crt" +``` + +### Ingress Configuration + +The chart supports four types of ingress resources: + +| Ingress Type | Purpose | Default Host | +|--------------|---------|-------------| +| `api` | gRPC API endpoints | `omni.example.com` | +| `ui` | Web interface | `omni.example.com` | +| `siderolink` | Siderolink gRPC API | `siderolink.omni.example.com` | +| `kubernetesProxy` | Kubernetes API proxy | `kubernetes.omni.example.com` | + +#### Basic Ingress Configuration + +```yaml +ingress: + api: + enabled: true + host: omni.example.com + ingressClassName: nginx + tls: + enabled: true + secretName: omni-api-tls +``` + +#### Cert-Manager Integration + +```yaml +ingress: + api: + enabled: true + certManager: + enabled: true + issuer: letsencrypt-prod +``` + +#### Kubernetes Proxy Wildcard + +The Kubernetes proxy ingress automatically creates a wildcard rule (`*.kubernetes.omni.example.com`) to support tools like ArgoCD that require unique hostnames per cluster. + +### Pod Disruption Budget + +```yaml +podDisruptionBudget: + enabled: true + minAvailable: 1 # or use maxUnavailable +``` + +### Per-Service Annotations + +Supports both global and per-service annotations: + +```yaml +service: + annotations: + example.com/global: "value" # Applied to all services + internal: + annotations: + example.com/internal-only: "value" + siderolink: + wireguard: + annotations: + example.com/wireguard-only: "value" +``` + +### Advanced Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `extraArgs` | Additional container arguments | `[]` | +| `customVolumes` | Additional volumes | `[]` | +| `customVolumeMounts` | Additional volume mounts | `[]` | + +## Backwards Compatibility + +The chart maintains full backwards compatibility with existing deployments: + +**Existing Deployments**: +- Charts deployed with previous versions continue using Deployment resources +- Storage configuration remains unchanged (manual PVC management) +- No disruption during upgrades +- `etcd.external` setting is ignored for existing deployments + +**New Deployments**: +- `etcd.external: false` (default) → StatefulSet with automatic PVC provisioning +- `etcd.external: true` → Deployment for external etcd clusters + +**Detection Logic**: +The chart uses Helm's `lookup` function to detect existing Deployment resources and automatically maintains compatibility. + +## Architecture Decisions + +### Deployment vs StatefulSet + +The chart automatically chooses the appropriate Kubernetes resource based on deployment history and etcd configuration: + +**Resource Selection Logic**: +1. **Existing Deployment detected** → Continue using Deployment (backwards compatibility) +2. **Existing StatefulSet detected** → Continue using StatefulSet (backwards compatibility) +3. **New deployment + `etcd.external: false`** → Use StatefulSet with embedded etcd +4. **New deployment + `etcd.external: true`** → Use Deployment with external etcd +5. **Resource type changes** → Only occur when switching etcd modes and no existing resource conflicts + +**StatefulSet Benefits** (new deployments only): +- Automatic PVC provisioning per replica +- Stable network identities +- Ordered deployment and scaling +- Limited to 1 replica (embedded etcd constraint) + +**Deployment Benefits**: +- Backwards compatibility with existing installations +- Horizontal scaling when using external etcd +- Simpler storage management for external etcd scenarios + +### WireGuard Address Resolution + +The WireGuard service supports both internal gRPC tunneling and external VPN connectivity: + +- **Internal**: Uses Kubernetes DNS (`wireguard.namespace.svc.cluster.local`) for cluster-internal communication +- **External**: Allows explicit IP/FQDN configuration for external client connectivity + +### Service Architecture + +The chart deploys three Kubernetes services: + +1. **internal**: Main Omni API service (ports 8080, 8090, 8095) +2. **internal-grpc**: gRPC service for internal communication (ports 8080, 8090) +3. **wireguard**: WireGuard VPN service (configurable type and port) + +### Port Configuration + +| Service | Port | Protocol | Description | +|---------|------|----------|-------------| +| omni | 8080 | TCP | Main API endpoint | +| siderolink | 8090 | TCP | Siderolink API | +| k8s-proxy | 8095 | TCP | Kubernetes proxy | +| wireguard | 30180 | UDP | WireGuard VPN | + +## Security Considerations + +### Required Capabilities + +The Omni container requires the `NET_ADMIN` capability for WireGuard functionality: + +```yaml +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + add: + - NET_ADMIN +``` + +### Device Plugin Requirements + +WireGuard functionality requires access to `/dev/net/tun`. Ensure your cluster has the appropriate device plugin configured: + +```yaml +resources: + limits: + squat.ai/tun: 1 +``` + +### Network Policies + +Consider implementing network policies to restrict traffic to Omni services based on your security requirements. + +## Troubleshooting + +### Common Issues + +#### Pod Fails to Start + +1. **Missing GPG Secret**: Ensure the GPG secret exists and contains the private key +2. **Storage Issues** (embedded etcd): Verify storage class and PVC provisioning +3. **etcd Connection** (external etcd): Check endpoints, credentials, and TLS configuration +4. **Device Plugin**: Confirm the TUN device plugin is available on nodes + +#### Scaling Issues + +1. **StatefulSet Scaling**: Cannot scale beyond 1 replica with embedded etcd - this is enforced by the chart +2. **Replica Count Ignored**: `deployment.replicaCount > 1` is ignored when `etcd.external: false` +3. **External etcd Required**: Use `etcd.external: true` for multiple replicas +4. **Data Corruption Risk**: Never attempt to manually scale the StatefulSet beyond 1 replica + +#### Service Connectivity + +1. **WireGuard Address**: Verify address resolution (internal DNS vs external IP) +2. **Ingress Configuration**: Check ingress class and TLS certificate availability + +#### Authentication Issues + +1. **Auth0 Configuration**: Verify client ID and domain are correct +2. **SAML Configuration**: Ensure SAML metadata is properly configured + +#### Network Connectivity + +1. **Service Discovery**: Verify DNS resolution within the cluster +2. **WireGuard**: Check NodePort accessibility and firewall rules + +### Logs + +View Omni logs: +```bash +kubectl logs -n omni-system deployment/omni +``` + +### Debug Mode + +Enable debug logging: +```yaml +extraArgs: + - --debug +``` + +## Upgrading + +### Backwards Compatibility + +Upgrading from previous chart versions is fully supported: + +- **Existing Deployments**: Continue using the same Deployment resource and storage configuration +- **No Resource Changes**: The chart automatically detects existing deployments and maintains compatibility +- **Configuration Preserved**: All existing values and storage remain unchanged + +### Backup + +Before upgrading, backup the etcd data: + +**For Deployment-based installations**: +```bash +kubectl exec -n omni-system deployment/omni -- tar -czf /tmp/etcd-backup.tar.gz /_out +kubectl cp omni-system/$(kubectl get pod -n omni-system -l app.kubernetes.io/name=omni -o jsonpath='{.items[0].metadata.name}'):/tmp/etcd-backup.tar.gz ./etcd-backup.tar.gz +``` + +**For StatefulSet-based installations**: +```bash +kubectl exec -n omni-system statefulset/omni -- tar -czf /tmp/etcd-backup.tar.gz /_out +kubectl cp omni-system/omni-0:/tmp/etcd-backup.tar.gz ./etcd-backup.tar.gz +``` + +### Upgrade Process + +```bash +helm upgrade omni sidero/omni \ + --namespace omni-system \ + --reuse-values +``` + +## Uninstalling + +```bash +helm uninstall omni --namespace omni-system +``` + +Note: This will not delete PVCs. Remove them manually if needed: + +**For Deployment-based installations**: +```bash +kubectl delete pvc omni-pvc --namespace omni-system +``` + +**For StatefulSet-based installations**: +```bash +kubectl delete pvc etcd-data-omni-0 --namespace omni-system +``` + +## Contributing + +For issues and contributions, please refer to the [Sidero Labs GitHub repository](https://github.com/siderolabs/omni). + +## License + +This chart is licensed under the Mozilla Public License 2.0. See the [LICENSE](https://github.com/siderolabs/omni/blob/main/LICENSE) file for details. \ No newline at end of file diff --git a/deploy/helm/omni/templates/deployment.yaml b/deploy/helm/omni/templates/deployment.yaml index 9418e8d2f..2785b0ddc 100644 --- a/deploy/helm/omni/templates/deployment.yaml +++ b/deploy/helm/omni/templates/deployment.yaml @@ -1,3 +1,6 @@ +{{- $existingDeployment := lookup "apps/v1" "Deployment" .Release.Namespace (include "omni.name" .) }} +{{- $existingStatefulSet := lookup "apps/v1" "StatefulSet" .Release.Namespace (include "omni.name" .) }} +{{- if or $existingDeployment (and .Values.etcd.external (not $existingStatefulSet)) }} --- apiVersion: apps/v1 kind: Deployment @@ -59,6 +62,19 @@ spec: - ALL add: - NET_ADMIN + env: + {{- if and .Values.etcd.external .Values.etcd.auth .Values.etcd.auth.secretName }} + - name: ETCD_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Values.etcd.auth.secretName }} + key: {{ .Values.etcd.auth.usernameKey | default "username" }} + - name: ETCD_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.etcd.auth.secretName }} + key: {{ .Values.etcd.auth.passwordKey | default "password" }} + {{- end }} volumeMounts: {{- if .Values.volumeMounts.tls.mountPath }} - name: tls @@ -71,6 +87,11 @@ spec: readOnly: {{ .Values.volumeMounts.omniAsc.readOnly }} - name: etcd mountPath: /_out + {{- if and .Values.etcd.external .Values.etcd.tls .Values.etcd.tls.secretName }} + - name: etcd-tls + mountPath: /etc/etcd/tls + readOnly: true + {{- end }} {{- if .Values.customVolumeMounts }} {{- with .Values.customVolumeMounts }} {{- toYaml . | nindent 10 }} @@ -95,14 +116,42 @@ spec: - --cert=/etc/omni/tls/tls.crt - --key=/etc/omni/tls/tls.key {{- end }} + {{- if .Values.etcd.external }} + - --etcd-endpoints={{ join "," .Values.etcd.endpoints }} + {{- if .Values.etcd.username }} + - --etcd-username={{ .Values.etcd.username }} + {{- else if and .Values.etcd.auth .Values.etcd.auth.secretName }} + - --etcd-username=$(ETCD_USERNAME) + {{- end }} + {{- if .Values.etcd.password }} + - --etcd-password={{ .Values.etcd.password }} + {{- else if and .Values.etcd.auth .Values.etcd.auth.secretName }} + - --etcd-password=$(ETCD_PASSWORD) + {{- end }} + {{- if and .Values.etcd.tls .Values.etcd.tls.enabled }} + {{- if .Values.etcd.tls.secretName }} + - --etcd-tls-cert-file=/etc/etcd/tls/{{ .Values.etcd.tls.certKey | default "client.crt" }} + - --etcd-tls-key-file=/etc/etcd/tls/{{ .Values.etcd.tls.keyKey | default "client.key" }} + - --etcd-tls-ca-file=/etc/etcd/tls/{{ .Values.etcd.tls.caKey | default "ca.crt" }} + {{- else }} + - --etcd-tls-cert-file={{ .Values.etcd.tls.certFile }} + - --etcd-tls-key-file={{ .Values.etcd.tls.keyFile }} + - --etcd-tls-ca-file={{ .Values.etcd.tls.caFile }} + {{- end }} + {{- if .Values.etcd.tls.insecureSkipVerify }} + - --etcd-tls-insecure-skip-verify + {{- end }} + {{- end }} + {{- else }} - --etcd-embedded=true + {{- end }} {{- if and .Values.initialUsers (gt (len .Values.initialUsers) 0) }} - --initial-users={{ join "," .Values.initialUsers }} {{- end }} - --name={{ .Values.name}} - --private-key-source={{ .Values.privateKeySource }} - --siderolink-api-advertised-url={{ printf "https://%s" .Values.service.siderolink.domainName }} - - --siderolink-wireguard-advertised-addr={{ .Values.service.siderolink.wireguard.address }}:{{ .Values.service.siderolink.wireguard.port }} + - --siderolink-wireguard-advertised-addr={{ if .Values.service.siderolink.wireguard.address }}{{ .Values.service.siderolink.wireguard.address }}{{ else }}{{ printf "%s.%s.svc.cluster.local" "wireguard" .Release.Namespace }}{{ end }}:{{ .Values.service.siderolink.wireguard.port }} {{- range $value := .Values.extraArgs }} - {{ $value }} {{- end }} @@ -118,8 +167,14 @@ spec: - name: etcd persistentVolumeClaim: claimName: {{ .Values.volumes.etcd.persistentVolumeClaimName }} + {{- if and .Values.etcd.external .Values.etcd.tls .Values.etcd.tls.secretName }} + - name: etcd-tls + secret: + secretName: {{ .Values.etcd.tls.secretName }} + {{- end }} {{- if .Values.customVolumes }} {{- with .Values.customVolumes }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} +{{- end }} diff --git a/deploy/helm/omni/templates/ingress.yaml b/deploy/helm/omni/templates/ingress.yaml new file mode 100644 index 000000000..b44dce545 --- /dev/null +++ b/deploy/helm/omni/templates/ingress.yaml @@ -0,0 +1,233 @@ +{{- if .Values.ingress.api.enabled }} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "omni.name" . }}-api + namespace: {{ .Release.Namespace }} + labels: + {{- include "omni.labels" . | nindent 4 }} + annotations: + nginx.ingress.kubernetes.io/backend-protocol: GRPC + nginx.ingress.kubernetes.io/proxy-body-size: 32m + nginx.ingress.kubernetes.io/service-upstream: "true" + {{- if .Values.ingress.api.certManager.enabled }} + cert-manager.io/cluster-issuer: {{ .Values.ingress.api.certManager.issuer }} + {{- end }} + {{- with .Values.ingress.api.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.ingress.api.ingressClassName }} + ingressClassName: {{ .Values.ingress.api.ingressClassName }} + {{- end }} + rules: + - host: {{ .Values.ingress.api.host }} + http: + paths: + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /cosi.resource.State + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /management.ManagementService + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /machine.MachineService + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /cluster.ClusterService + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /inspect.InspectService + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /resource.ResourceService + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /storage.StorageService + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /time.TimeService + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /auth.AuthService + pathType: ImplementationSpecific + - backend: + service: + name: internal-grpc + port: + number: 8080 + path: /oicd. + pathType: ImplementationSpecific + {{- if .Values.ingress.api.tls.enabled }} + tls: + - hosts: + - {{ .Values.ingress.api.host }} + secretName: {{ .Values.ingress.api.tls.secretName }} + {{- end }} +{{- end }} + +{{- if .Values.ingress.ui.enabled }} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "omni.name" . }}-ui + namespace: {{ .Release.Namespace }} + labels: + {{- include "omni.labels" . | nindent 4 }} + annotations: + {{- if .Values.ingress.ui.certManager.enabled }} + cert-manager.io/cluster-issuer: {{ .Values.ingress.ui.certManager.issuer }} + {{- end }} + {{- with .Values.ingress.ui.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.ingress.ui.ingressClassName }} + ingressClassName: {{ .Values.ingress.ui.ingressClassName }} + {{- end }} + rules: + - host: {{ .Values.ingress.ui.host }} + http: + paths: + - backend: + service: + name: internal + port: + number: 8080 + path: / + pathType: Prefix + {{- if .Values.ingress.ui.tls.enabled }} + tls: + - hosts: + - {{ .Values.ingress.ui.host }} + secretName: {{ .Values.ingress.ui.tls.secretName }} + {{- end }} +{{- end }} + +{{- if .Values.ingress.siderolink.enabled }} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "omni.name" . }}-siderolink + namespace: {{ .Release.Namespace }} + labels: + {{- include "omni.labels" . | nindent 4 }} + annotations: + nginx.ingress.kubernetes.io/backend-protocol: GRPC + {{- if .Values.ingress.siderolink.certManager.enabled }} + cert-manager.io/cluster-issuer: {{ .Values.ingress.siderolink.certManager.issuer }} + {{- end }} + {{- with .Values.ingress.siderolink.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.ingress.siderolink.ingressClassName }} + ingressClassName: {{ .Values.ingress.siderolink.ingressClassName }} + {{- end }} + rules: + - host: {{ .Values.ingress.siderolink.host }} + http: + paths: + - backend: + service: + name: internal-grpc + port: + number: 8090 + path: / + pathType: ImplementationSpecific + {{- if .Values.ingress.siderolink.tls.enabled }} + tls: + - hosts: + - {{ .Values.ingress.siderolink.host }} + secretName: {{ .Values.ingress.siderolink.tls.secretName }} + {{- end }} +{{- end }} + +{{- if .Values.ingress.kubernetesProxy.enabled }} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "omni.name" . }}-kubernetes-proxy + namespace: {{ .Release.Namespace }} + labels: + {{- include "omni.labels" . | nindent 4 }} + annotations: + {{- if .Values.ingress.kubernetesProxy.certManager.enabled }} + cert-manager.io/cluster-issuer: {{ .Values.ingress.kubernetesProxy.certManager.issuer }} + {{- end }} + {{- with .Values.ingress.kubernetesProxy.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.ingress.kubernetesProxy.ingressClassName }} + ingressClassName: {{ .Values.ingress.kubernetesProxy.ingressClassName }} + {{- end }} + rules: + - host: {{ .Values.ingress.kubernetesProxy.host }} + http: + paths: + - backend: + service: + name: internal + port: + number: 8095 + path: / + pathType: ImplementationSpecific + - host: {{ printf "*.%s" .Values.ingress.kubernetesProxy.host }} + http: + paths: + - backend: + service: + name: internal + port: + number: 8095 + path: / + pathType: ImplementationSpecific + {{- if .Values.ingress.kubernetesProxy.tls.enabled }} + tls: + - hosts: + - {{ .Values.ingress.kubernetesProxy.host }} + - {{ printf "*.%s" .Values.ingress.kubernetesProxy.host }} + secretName: {{ .Values.ingress.kubernetesProxy.tls.secretName }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/deploy/helm/omni/templates/poddisruptionbudget.yaml b/deploy/helm/omni/templates/poddisruptionbudget.yaml new file mode 100644 index 000000000..3e7256f36 --- /dev/null +++ b/deploy/helm/omni/templates/poddisruptionbudget.yaml @@ -0,0 +1,20 @@ +{{- if .Values.podDisruptionBudget.enabled }} +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "omni.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "omni.labels" . | nindent 4 }} +spec: + {{- if .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- include "omni.selectorLabels" . | nindent 6 }} +{{- end }} \ No newline at end of file diff --git a/deploy/helm/omni/templates/service.yaml b/deploy/helm/omni/templates/service.yaml index e4e513758..05570bc4f 100644 --- a/deploy/helm/omni/templates/service.yaml +++ b/deploy/helm/omni/templates/service.yaml @@ -5,9 +5,16 @@ metadata: name: internal namespace: {{ .Release.Namespace }} annotations: + {{- if .Values.service.annotations }} {{- range $key, $value := .Values.service.annotations }} {{ $key }}: {{ tpl ($value | toString) $ | quote }} {{- end }} + {{- end }} + {{- if and .Values.service.internal .Values.service.internal.annotations }} + {{- range $key, $value := .Values.service.internal.annotations }} + {{ $key }}: {{ tpl ($value | toString) $ | quote }} + {{- end }} + {{- end }} labels: {{- include "omni.labels" . | nindent 4 }} {{- if .Values.service.labels }} @@ -15,7 +22,9 @@ metadata: {{- end }} spec: type: {{ .Values.service.type }} + {{- if .Values.service.clusterIP }} clusterIP: {{ .Values.service.clusterIP }} + {{- end }} ports: - name: omni port: 8080 @@ -38,9 +47,16 @@ metadata: name: internal-grpc namespace: {{ .Release.Namespace }} annotations: + {{- if .Values.service.annotations }} {{- range $key, $value := .Values.service.annotations }} {{ $key }}: {{ tpl ($value | toString) $ | quote }} {{- end }} + {{- end }} + {{- if and .Values.service.internalGrpc .Values.service.internalGrpc.annotations }} + {{- range $key, $value := .Values.service.internalGrpc.annotations }} + {{ $key }}: {{ tpl ($value | toString) $ | quote }} + {{- end }} + {{- end }} labels: {{- include "omni.labels" . | nindent 4 }} {{- if .Values.service.labels }} @@ -48,7 +64,9 @@ metadata: {{- end }} spec: type: {{ .Values.service.type }} + {{- if .Values.service.clusterIP }} clusterIP: {{ .Values.service.clusterIP }} + {{- end }} ports: - name: omni port: 8080 @@ -67,17 +85,27 @@ metadata: name: wireguard namespace: {{ .Release.Namespace }} annotations: + {{- if .Values.service.annotations }} {{- range $key, $value := .Values.service.annotations }} {{ $key }}: {{ tpl ($value | toString) $ | quote }} {{- end }} + {{- end }} + {{- if and .Values.service.siderolink.wireguard .Values.service.siderolink.wireguard.annotations }} + {{- range $key, $value := .Values.service.siderolink.wireguard.annotations }} + {{ $key }}: {{ tpl ($value | toString) $ | quote }} + {{- end }} + {{- end }} labels: {{- include "omni.labels" . | nindent 4 }} {{- if .Values.service.labels }} {{- toYaml .Values.service.labels | nindent 4 }} {{- end }} spec: - type: NodePort + type: {{ .Values.service.siderolink.wireguard.type | default "NodePort" }} + externalTrafficPolicy: {{ .Values.service.siderolink.wireguard.externalTrafficPolicy | default "Cluster" }} + {{- if .Values.service.clusterIP }} clusterIP: {{ .Values.service.clusterIP }} + {{- end }} ports: - name: wireguard nodePort: {{ .Values.service.siderolink.wireguard.port }} diff --git a/deploy/helm/omni/templates/statefulset.yaml b/deploy/helm/omni/templates/statefulset.yaml new file mode 100644 index 000000000..6ee351972 --- /dev/null +++ b/deploy/helm/omni/templates/statefulset.yaml @@ -0,0 +1,136 @@ +{{- $existingDeployment := lookup "apps/v1" "Deployment" .Release.Namespace (include "omni.name" .) }} +{{- $existingStatefulSet := lookup "apps/v1" "StatefulSet" .Release.Namespace (include "omni.name" .) }} +{{- if or $existingStatefulSet (and (not .Values.etcd.external) (not $existingDeployment)) }} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "omni.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "omni.labels" . | nindent 4 }} + app.kubernetes.io/component: omni + {{- with .Values.deployment.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- toYaml .Values.deployment.annotations | nindent 4 }} +spec: + serviceName: {{ include "omni.name" . }} + replicas: 1 # Embedded etcd only supports single instance + selector: + matchLabels: {{- include "omni.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- if .Values.deployment.podAnnotations }} + annotations: + {{- toYaml .Values.deployment.annotations | nindent 6 }} + {{- end }} + labels: + {{- include "omni.labels" . | nindent 8 }} + app.kubernetes.io/component: omni + {{- with .Values.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + containers: + - name: omni + image: {{ .Values.deployment.image }}:{{ .Values.deployment.tag | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.deployment.imagePullPolicy }} + resources: + {{- with .Values.resources }} + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: omni + containerPort: 8080 + protocol: TCP + - name: siderolink + containerPort: 8090 + protocol: TCP + - name: k8s-proxy + containerPort: 8095 + protocol: TCP + - name: wireguard + containerPort: 50180 + protocol: UDP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + add: + - NET_ADMIN + volumeMounts: + {{- if .Values.volumeMounts.tls.mountPath }} + - name: tls + mountPath: {{ .Values.volumeMounts.tls.mountPath }} + readOnly: {{ .Values.volumeMounts.tls.readOnly }} + {{- end }} + - name: omni-asc + mountPath: {{ .Values.volumeMounts.omniAsc.mountPath }} + subPath: {{ .Values.volumeMounts.omniAsc.subPath }} + readOnly: {{ .Values.volumeMounts.omniAsc.readOnly }} + - name: etcd-data + mountPath: /_out + {{- if .Values.customVolumeMounts }} + {{- with .Values.customVolumeMounts }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- end }} + args: + - --account-id="{{ .Values.accountUuid }}" + - --advertised-api-url={{ printf "https://%s/" .Values.domainName }} + - --advertised-kubernetes-proxy-url={{ printf "https://%s/" .Values.service.k8sProxy.domainName }} + {{- if .Values.auth.auth0.enabled }} + - --auth-auth0-enabled=true + - --auth-auth0-client-id={{ .Values.auth.auth0.clientId | toString}} + - --auth-auth0-domain={{ .Values.auth.auth0.domain }} + {{- end }} + {{- if .Values.auth.saml.enabled }} + - --auth-saml-enabled=true + {{- if .Values.auth.saml.url }} + - --auth-saml-url={{ .Values.auth.saml.url }} + {{- end }} + {{- end }} + {{- if .Values.volumes.tls.secretName }} + - --cert=/etc/omni/tls/tls.crt + - --key=/etc/omni/tls/tls.key + {{- end }} + - --etcd-embedded=true + {{- if and .Values.initialUsers (gt (len .Values.initialUsers) 0) }} + - --initial-users={{ join "," .Values.initialUsers }} + {{- end }} + - --name={{ .Values.name}} + - --private-key-source={{ .Values.privateKeySource }} + - --siderolink-api-advertised-url={{ printf "https://%s" .Values.service.siderolink.domainName }} + - --siderolink-wireguard-advertised-addr={{ if .Values.service.siderolink.wireguard.address }}{{ .Values.service.siderolink.wireguard.address }}{{ else }}{{ printf "%s.%s.svc.cluster.local" "wireguard" .Release.Namespace }}{{ end }}:{{ .Values.service.siderolink.wireguard.port }} + {{- range $value := .Values.extraArgs }} + - {{ $value }} + {{- end }} + volumes: + {{- if .Values.volumes.tls.secretName }} + - name: tls + secret: + secretName: {{ .Values.volumes.tls.secretName }} + {{- end }} + - name: omni-asc + secret: + secretName: {{ .Values.volumes.gpg.secretName }} + {{- if .Values.customVolumes }} + {{- with .Values.customVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: etcd-data + spec: + accessModes: [ "ReadWriteOnce" ] + {{- if .Values.volumes.etcd.storageClass }} + storageClassName: {{ .Values.volumes.etcd.storageClass }} + {{- end }} + resources: + requests: + storage: {{ .Values.volumes.etcd.size | default "50Gi" }} +{{- end }} \ No newline at end of file diff --git a/deploy/helm/omni/values.yaml b/deploy/helm/omni/values.yaml index 90387d604..21531d687 100644 --- a/deploy/helm/omni/values.yaml +++ b/deploy/helm/omni/values.yaml @@ -24,8 +24,11 @@ service: siderolink: domainName: omni.siderolink.example.com wireguard: - address: "" # + # Optional: explicit address to advertise (if empty, uses wireguard.namespace.svc.cluster.local) + address: "" port: 30180 + type: NodePort # NodePort, LoadBalancer, or ClusterIP + externalTrafficPolicy: Cluster # Local or Cluster (for NodePort/LoadBalancer) k8sProxy: domainName: omni.kubernetes.example.com resources: @@ -38,6 +41,55 @@ resources: # Required for /dev/net/tun # https://www.talos.dev/v1.8/kubernetes-guides/configuration/device-plugins/ squat.ai/tun: 1 +podDisruptionBudget: + enabled: false + minAvailable: 1 + # maxUnavailable: 1 +ingress: + api: + enabled: false + host: omni.example.com + ingressClassName: nginx + annotations: {} + certManager: + enabled: false + issuer: letsencrypt-prod + tls: + enabled: false + secretName: omni-api-tls + ui: + enabled: false + host: omni.example.com + ingressClassName: nginx + annotations: {} + certManager: + enabled: false + issuer: letsencrypt-prod + tls: + enabled: false + secretName: omni-ui-tls + siderolink: + enabled: false + host: siderolink.omni.example.com + ingressClassName: nginx + annotations: {} + certManager: + enabled: false + issuer: letsencrypt-prod + tls: + enabled: false + secretName: omni-siderolink-tls + kubernetesProxy: + enabled: false + host: kubernetes.omni.example.com + ingressClassName: nginx + annotations: {} + certManager: + enabled: false + issuer: letsencrypt-prod + tls: + enabled: false + secretName: omni-kubernetes-proxy-tls extraArgs: # - --debug # - --image-factory-address=factory.talos.dev @@ -50,9 +102,42 @@ volumeMounts: mountPath: "/omni.asc" subPath: "omni.asc" readOnly: true +etcd: + # Set to true to use external etcd cluster (enables Deployment with multiple replicas) + # Set to false to use embedded etcd (forces StatefulSet with 1 replica) + external: false + # External etcd configuration (only used when external: true) + endpoints: [] + # - "https://etcd-1.example.com:2379" + # - "https://etcd-2.example.com:2379" + # - "https://etcd-3.example.com:2379" + # Optional: etcd authentication + # username: "" + # password: "" + # Optional: etcd authentication from secret + # auth: + # secretName: "etcd-auth" + # usernameKey: "username" + # passwordKey: "password" + # Optional: TLS configuration for etcd + # tls: + # enabled: false + # certFile: "/etc/etcd/tls/client.crt" + # keyFile: "/etc/etcd/tls/client.key" + # caFile: "/etc/etcd/tls/ca.crt" + # insecureSkipVerify: false + # # Optional: TLS certificates from secret + # secretName: "etcd-tls" + # certKey: "client.crt" + # keyKey: "client.key" + # caKey: "ca.crt" volumes: etcd: + # For Deployment (backwards compatibility): manual PVC name persistentVolumeClaimName: omni-pvc + # For StatefulSet (embedded etcd): automatic PVC provisioning + size: "50Gi" + # storageClass: "" # Use default storage class if not specified tls: secretName: null # tls gpg: From 6b5eba130c7a0d8980210b769c79915e9b521727 Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Sun, 21 Sep 2025 15:57:37 -0700 Subject: [PATCH 02/11] Update README --- deploy/helm/omni/README.md | 266 +++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) diff --git a/deploy/helm/omni/README.md b/deploy/helm/omni/README.md index b1c41b405..750cac16c 100644 --- a/deploy/helm/omni/README.md +++ b/deploy/helm/omni/README.md @@ -42,7 +42,16 @@ Omni is a SaaS-native Talos Linux cluster fleet management platform that provide - [Common Issues](#common-issues) - [Logs](#logs) - [Debug Mode](#debug-mode) +- [Migration Guide](#migration-guide) + - [Migrating from Deployment to StatefulSet](#migrating-from-deployment-to-statefulset) + - [Migrating to External etcd](#migrating-to-external-etcd) +- [Configuration Examples](#configuration-examples) + - [Minimal Embedded etcd (StatefulSet)](#minimal-embedded-etcd-statefulset) + - [Minimal External etcd (Deployment)](#minimal-external-etcd-deployment) + - [Production with Ingress](#production-with-ingress) + - [Development/Testing](#developmenttesting) - [Upgrading](#upgrading) + - [Backwards Compatibility](#backwards-compatibility) - [Backup](#backup) - [Upgrade Process](#upgrade-process) - [Uninstalling](#uninstalling) @@ -571,6 +580,263 @@ extraArgs: - --debug ``` +## Migration Guide + +### Migrating from Deployment to StatefulSet + +To migrate an existing Deployment-based installation to StatefulSet (for better storage management): + +1. **Backup etcd data**: +```bash +kubectl exec -n omni-system deployment/omni -- tar -czf /tmp/etcd-backup.tar.gz /_out +kubectl cp omni-system/$(kubectl get pod -n omni-system -l app.kubernetes.io/name=omni -o jsonpath='{.items[0].metadata.name}'):/tmp/etcd-backup.tar.gz ./etcd-backup.tar.gz +``` + +2. **Delete existing Deployment** (this will cause downtime): +```bash +helm uninstall omni --namespace omni-system +kubectl delete pvc omni-pvc --namespace omni-system +``` + +3. **Reinstall with StatefulSet**: +```bash +helm install omni sidero/omni \ + --namespace omni-system \ + --create-namespace \ + --set etcd.external=false \ + --set domainName=your-domain.com \ + --set accountUuid=your-account-uuid +``` + +4. **Restore etcd data**: +```bash +kubectl cp ./etcd-backup.tar.gz omni-system/omni-0:/tmp/etcd-backup.tar.gz +kubectl exec -n omni-system omni-0 -- tar -xzf /tmp/etcd-backup.tar.gz -C / +``` + +### Migrating to External etcd + +To migrate from embedded etcd to external etcd: + +1. **Set up external etcd cluster** (outside scope of this guide) + +2. **Backup embedded etcd data**: +```bash +kubectl exec -n omni-system deployment/omni -- tar -czf /tmp/etcd-backup.tar.gz /_out +kubectl cp omni-system/$(kubectl get pod -n omni-system -l app.kubernetes.io/name=omni -o jsonpath='{.items[0].metadata.name}'):/tmp/etcd-backup.tar.gz ./etcd-backup.tar.gz +``` + +3. **Restore data to external etcd** (use etcd restore tools) + +4. **Update Helm values**: +```yaml +etcd: + external: true + endpoints: + - "https://etcd-1.example.com:2379" + - "https://etcd-2.example.com:2379" + - "https://etcd-3.example.com:2379" +deployment: + replicaCount: 3 # Now supports multiple replicas +``` + +5. **Upgrade deployment**: +```bash +helm upgrade omni sidero/omni \ + --namespace omni-system \ + --values values.yaml +``` + +## Configuration Examples + +### Minimal Embedded etcd (StatefulSet) + +```yaml +# values-embedded.yaml +domainName: omni.example.com +accountUuid: "12345678-1234-1234-1234-123456789012" + +auth: + auth0: + enabled: true + clientId: "your-auth0-client-id" + domain: "https://your-auth0-domain" + +etcd: + external: false + +volumes: + etcd: + size: "100Gi" + storageClass: "fast-ssd" + gpg: + secretName: "omni-gpg" +``` + +### Minimal External etcd (Deployment) + +```yaml +# values-external-etcd.yaml +domainName: omni.example.com +accountUuid: "12345678-1234-1234-1234-123456789012" + +deployment: + replicaCount: 3 + +auth: + auth0: + enabled: true + clientId: "your-auth0-client-id" + domain: "https://your-auth0-domain" + +etcd: + external: true + endpoints: + - "https://etcd-1.example.com:2379" + - "https://etcd-2.example.com:2379" + - "https://etcd-3.example.com:2379" + username: "omni" + password: "secure-password" + tls: + enabled: true + certFile: "/etc/etcd/tls/client.crt" + keyFile: "/etc/etcd/tls/client.key" + caFile: "/etc/etcd/tls/ca.crt" + +volumes: + gpg: + secretName: "omni-gpg" +``` + +### Production with Ingress + +```yaml +# values-production.yaml +domainName: omni.example.com +accountUuid: "12345678-1234-1234-1234-123456789012" + +deployment: + replicaCount: 3 + +auth: + auth0: + enabled: true + clientId: "your-auth0-client-id" + domain: "https://your-auth0-domain" + +etcd: + external: true + endpoints: + - "https://etcd-1.example.com:2379" + - "https://etcd-2.example.com:2379" + - "https://etcd-3.example.com:2379" + auth: + secretName: "etcd-credentials" + tls: + enabled: true + secretName: "etcd-tls" + +service: + siderolink: + wireguard: + type: LoadBalancer + externalTrafficPolicy: Local + +ingress: + api: + enabled: true + host: omni.example.com + ingressClassName: nginx + certManager: + enabled: true + issuer: letsencrypt-prod + tls: + enabled: true + secretName: omni-api-tls + ui: + enabled: true + host: omni.example.com + ingressClassName: nginx + certManager: + enabled: true + issuer: letsencrypt-prod + tls: + enabled: true + secretName: omni-ui-tls + siderolink: + enabled: true + host: siderolink.omni.example.com + ingressClassName: nginx + certManager: + enabled: true + issuer: letsencrypt-prod + tls: + enabled: true + secretName: omni-siderolink-tls + kubernetesProxy: + enabled: true + host: kubernetes.omni.example.com + ingressClassName: nginx + certManager: + enabled: true + issuer: letsencrypt-prod + tls: + enabled: true + secretName: omni-kubernetes-proxy-tls + +podDisruptionBudget: + enabled: true + minAvailable: 2 + +volumes: + gpg: + secretName: "omni-gpg" + tls: + secretName: "omni-tls" + +resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi +``` + +### Development/Testing + +```yaml +# values-dev.yaml +domainName: omni-dev.example.com +accountUuid: "12345678-1234-1234-1234-123456789012" + +auth: + auth0: + enabled: true + clientId: "your-dev-auth0-client-id" + domain: "https://your-dev-auth0-domain" + +etcd: + external: false + +volumes: + etcd: + size: "10Gi" + gpg: + secretName: "omni-gpg-dev" + +resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + +extraArgs: + - --debug +``` + ## Upgrading ### Backwards Compatibility From 6583a1cfb9c1f30466476a20c3cee05d66e96eb2 Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Sun, 21 Sep 2025 21:43:34 -0700 Subject: [PATCH 03/11] Fix annotation requirements for ingress --- deploy/helm/omni/Chart.yaml | 2 +- deploy/helm/omni/templates/ingress.yaml | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/deploy/helm/omni/Chart.yaml b/deploy/helm/omni/Chart.yaml index 0fbd7899b..899ebc1c3 100644 --- a/deploy/helm/omni/Chart.yaml +++ b/deploy/helm/omni/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: omni description: A helm chart to deploy Omni on a Kubernetes cluster type: application -version: 1.0.0 +version: 1.0.0-rc2 appVersion: "v1.1.4" diff --git a/deploy/helm/omni/templates/ingress.yaml b/deploy/helm/omni/templates/ingress.yaml index b44dce545..f5bb250a5 100644 --- a/deploy/helm/omni/templates/ingress.yaml +++ b/deploy/helm/omni/templates/ingress.yaml @@ -11,11 +11,11 @@ metadata: nginx.ingress.kubernetes.io/backend-protocol: GRPC nginx.ingress.kubernetes.io/proxy-body-size: 32m nginx.ingress.kubernetes.io/service-upstream: "true" - {{- if .Values.ingress.api.certManager.enabled }} + {{- if and .Values.ingress.api.certManager .Values.ingress.api.certManager.enabled }} cert-manager.io/cluster-issuer: {{ .Values.ingress.api.certManager.issuer }} {{- end }} - {{- with .Values.ingress.api.annotations }} - {{- toYaml . | nindent 4 }} + {{- if .Values.ingress.api.annotations }} + {{- toYaml .Values.ingress.api.annotations | nindent 4 }} {{- end }} spec: {{- if .Values.ingress.api.ingressClassName }} @@ -113,11 +113,11 @@ metadata: labels: {{- include "omni.labels" . | nindent 4 }} annotations: - {{- if .Values.ingress.ui.certManager.enabled }} + {{- if and .Values.ingress.ui.certManager .Values.ingress.ui.certManager.enabled }} cert-manager.io/cluster-issuer: {{ .Values.ingress.ui.certManager.issuer }} {{- end }} - {{- with .Values.ingress.ui.annotations }} - {{- toYaml . | nindent 4 }} + {{- if .Values.ingress.ui.annotations }} + {{- toYaml .Values.ingress.ui.annotations | nindent 4 }} {{- end }} spec: {{- if .Values.ingress.ui.ingressClassName }} @@ -153,11 +153,11 @@ metadata: {{- include "omni.labels" . | nindent 4 }} annotations: nginx.ingress.kubernetes.io/backend-protocol: GRPC - {{- if .Values.ingress.siderolink.certManager.enabled }} + {{- if and .Values.ingress.siderolink.certManager .Values.ingress.siderolink.certManager.enabled }} cert-manager.io/cluster-issuer: {{ .Values.ingress.siderolink.certManager.issuer }} {{- end }} - {{- with .Values.ingress.siderolink.annotations }} - {{- toYaml . | nindent 4 }} + {{- if .Values.ingress.siderolink.annotations }} + {{- toYaml .Values.ingress.siderolink.annotations | nindent 4 }} {{- end }} spec: {{- if .Values.ingress.siderolink.ingressClassName }} @@ -192,11 +192,11 @@ metadata: labels: {{- include "omni.labels" . | nindent 4 }} annotations: - {{- if .Values.ingress.kubernetesProxy.certManager.enabled }} + {{- if and .Values.ingress.kubernetesProxy.certManager .Values.ingress.kubernetesProxy.certManager.enabled }} cert-manager.io/cluster-issuer: {{ .Values.ingress.kubernetesProxy.certManager.issuer }} {{- end }} - {{- with .Values.ingress.kubernetesProxy.annotations }} - {{- toYaml . | nindent 4 }} + {{- if .Values.ingress.kubernetesProxy.annotations }} + {{- toYaml .Values.ingress.kubernetesProxy.annotations | nindent 4 }} {{- end }} spec: {{- if .Values.ingress.kubernetesProxy.ingressClassName }} From 791e070081eab9f8c779895ab86928eecfcde56f Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Sun, 21 Sep 2025 22:52:24 -0700 Subject: [PATCH 04/11] update wwith corrected templates --- deploy/helm/omni/Chart.yaml | 2 +- deploy/helm/omni/templates/ingress.yaml | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/deploy/helm/omni/Chart.yaml b/deploy/helm/omni/Chart.yaml index 899ebc1c3..ed9f9ffc5 100644 --- a/deploy/helm/omni/Chart.yaml +++ b/deploy/helm/omni/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: omni description: A helm chart to deploy Omni on a Kubernetes cluster type: application -version: 1.0.0-rc2 +version: 1.0.0-rc7 appVersion: "v1.1.4" diff --git a/deploy/helm/omni/templates/ingress.yaml b/deploy/helm/omni/templates/ingress.yaml index f5bb250a5..23010838f 100644 --- a/deploy/helm/omni/templates/ingress.yaml +++ b/deploy/helm/omni/templates/ingress.yaml @@ -18,7 +18,7 @@ metadata: {{- toYaml .Values.ingress.api.annotations | nindent 4 }} {{- end }} spec: - {{- if .Values.ingress.api.ingressClassName }} + {{- if and .Values.ingress.api.ingressClassName (ne .Values.ingress.api.ingressClassName "") }} ingressClassName: {{ .Values.ingress.api.ingressClassName }} {{- end }} rules: @@ -95,7 +95,7 @@ spec: number: 8080 path: /oicd. pathType: ImplementationSpecific - {{- if .Values.ingress.api.tls.enabled }} + {{- if and .Values.ingress.api.tls .Values.ingress.api.tls.enabled }} tls: - hosts: - {{ .Values.ingress.api.host }} @@ -120,7 +120,7 @@ metadata: {{- toYaml .Values.ingress.ui.annotations | nindent 4 }} {{- end }} spec: - {{- if .Values.ingress.ui.ingressClassName }} + {{- if and .Values.ingress.ui.ingressClassName (ne .Values.ingress.ui.ingressClassName "") }} ingressClassName: {{ .Values.ingress.ui.ingressClassName }} {{- end }} rules: @@ -134,7 +134,7 @@ spec: number: 8080 path: / pathType: Prefix - {{- if .Values.ingress.ui.tls.enabled }} + {{- if and .Values.ingress.ui.tls .Values.ingress.ui.tls.enabled }} tls: - hosts: - {{ .Values.ingress.ui.host }} @@ -160,7 +160,7 @@ metadata: {{- toYaml .Values.ingress.siderolink.annotations | nindent 4 }} {{- end }} spec: - {{- if .Values.ingress.siderolink.ingressClassName }} + {{- if and .Values.ingress.siderolink.ingressClassName (ne .Values.ingress.siderolink.ingressClassName "") }} ingressClassName: {{ .Values.ingress.siderolink.ingressClassName }} {{- end }} rules: @@ -174,7 +174,7 @@ spec: number: 8090 path: / pathType: ImplementationSpecific - {{- if .Values.ingress.siderolink.tls.enabled }} + {{- if and .Values.ingress.siderolink.tls .Values.ingress.siderolink.tls.enabled }} tls: - hosts: - {{ .Values.ingress.siderolink.host }} @@ -199,7 +199,7 @@ metadata: {{- toYaml .Values.ingress.kubernetesProxy.annotations | nindent 4 }} {{- end }} spec: - {{- if .Values.ingress.kubernetesProxy.ingressClassName }} + {{- if and .Values.ingress.kubernetesProxy.ingressClassName (ne .Values.ingress.kubernetesProxy.ingressClassName "") }} ingressClassName: {{ .Values.ingress.kubernetesProxy.ingressClassName }} {{- end }} rules: @@ -213,7 +213,7 @@ spec: number: 8095 path: / pathType: ImplementationSpecific - - host: {{ printf "*.%s" .Values.ingress.kubernetesProxy.host }} + - host: "{{ printf "*.%s" .Values.ingress.kubernetesProxy.host }}" http: paths: - backend: @@ -223,11 +223,11 @@ spec: number: 8095 path: / pathType: ImplementationSpecific - {{- if .Values.ingress.kubernetesProxy.tls.enabled }} + {{- if and .Values.ingress.kubernetesProxy.tls .Values.ingress.kubernetesProxy.tls.enabled }} tls: - hosts: - {{ .Values.ingress.kubernetesProxy.host }} - - {{ printf "*.%s" .Values.ingress.kubernetesProxy.host }} + - "{{ printf "*.%s" .Values.ingress.kubernetesProxy.host }}" secretName: {{ .Values.ingress.kubernetesProxy.tls.secretName }} {{- end }} {{- end }} \ No newline at end of file From 2c8c1081f17904800ff30a1c3b89bfa000f8b662 Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Tue, 23 Sep 2025 22:57:42 -0700 Subject: [PATCH 05/11] Update README and remove unused values --- deploy/helm/omni/Chart.yaml | 2 +- deploy/helm/omni/README.md | 46 +++++++++++++-------- deploy/helm/omni/templates/deployment.yaml | 3 ++ deploy/helm/omni/templates/statefulset.yaml | 3 ++ 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/deploy/helm/omni/Chart.yaml b/deploy/helm/omni/Chart.yaml index ed9f9ffc5..b7dccf56b 100644 --- a/deploy/helm/omni/Chart.yaml +++ b/deploy/helm/omni/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: omni description: A helm chart to deploy Omni on a Kubernetes cluster type: application -version: 1.0.0-rc7 +version: 1.0.0-rc9 appVersion: "v1.1.4" diff --git a/deploy/helm/omni/README.md b/deploy/helm/omni/README.md index 750cac16c..dc3a58e96 100644 --- a/deploy/helm/omni/README.md +++ b/deploy/helm/omni/README.md @@ -588,8 +588,14 @@ To migrate an existing Deployment-based installation to StatefulSet (for better 1. **Backup etcd data**: ```bash -kubectl exec -n omni-system deployment/omni -- tar -czf /tmp/etcd-backup.tar.gz /_out -kubectl cp omni-system/$(kubectl get pod -n omni-system -l app.kubernetes.io/name=omni -o jsonpath='{.items[0].metadata.name}'):/tmp/etcd-backup.tar.gz ./etcd-backup.tar.gz +# Forward the etcd port +kubectl port-forward -n omni-system pod/omni-0 2379:2379 & + +# Create etcd snapshot using local etcdctl +etcdctl snapshot save omni-etcd-backup.db --endpoints=http://localhost:2379 + +# Stop port forwarding +kill %1 ``` 2. **Delete existing Deployment** (this will cause downtime): @@ -608,11 +614,7 @@ helm install omni sidero/omni \ --set accountUuid=your-account-uuid ``` -4. **Restore etcd data**: -```bash -kubectl cp ./etcd-backup.tar.gz omni-system/omni-0:/tmp/etcd-backup.tar.gz -kubectl exec -n omni-system omni-0 -- tar -xzf /tmp/etcd-backup.tar.gz -C / -``` +4. **Restore etcd data** (use etcd restore tools with the snapshot file) ### Migrating to External etcd @@ -622,8 +624,14 @@ To migrate from embedded etcd to external etcd: 2. **Backup embedded etcd data**: ```bash -kubectl exec -n omni-system deployment/omni -- tar -czf /tmp/etcd-backup.tar.gz /_out -kubectl cp omni-system/$(kubectl get pod -n omni-system -l app.kubernetes.io/name=omni -o jsonpath='{.items[0].metadata.name}'):/tmp/etcd-backup.tar.gz ./etcd-backup.tar.gz +# Forward the etcd port +kubectl port-forward -n omni-system pod/omni-0 2379:2379 & + +# Create etcd snapshot using local etcdctl +etcdctl snapshot save omni-etcd-backup.db --endpoints=http://localhost:2379 + +# Stop port forwarding +kill %1 ``` 3. **Restore data to external etcd** (use etcd restore tools) @@ -849,20 +857,22 @@ Upgrading from previous chart versions is fully supported: ### Backup -Before upgrading, backup the etcd data: +Before upgrading, backup the embedded etcd data: -**For Deployment-based installations**: +**For embedded etcd (recommended method)**: ```bash -kubectl exec -n omni-system deployment/omni -- tar -czf /tmp/etcd-backup.tar.gz /_out -kubectl cp omni-system/$(kubectl get pod -n omni-system -l app.kubernetes.io/name=omni -o jsonpath='{.items[0].metadata.name}'):/tmp/etcd-backup.tar.gz ./etcd-backup.tar.gz -``` +# Forward the etcd port +kubectl port-forward -n omni-system pod/omni-0 2379:2379 & -**For StatefulSet-based installations**: -```bash -kubectl exec -n omni-system statefulset/omni -- tar -czf /tmp/etcd-backup.tar.gz /_out -kubectl cp omni-system/omni-0:/tmp/etcd-backup.tar.gz ./etcd-backup.tar.gz +# Create etcd snapshot using local etcdctl +etcdctl snapshot save omni-etcd-backup.db --endpoints=http://localhost:2379 + +# Stop port forwarding +kill %1 ``` + + ### Upgrade Process ```bash diff --git a/deploy/helm/omni/templates/deployment.yaml b/deploy/helm/omni/templates/deployment.yaml index 2785b0ddc..ff94b53d9 100644 --- a/deploy/helm/omni/templates/deployment.yaml +++ b/deploy/helm/omni/templates/deployment.yaml @@ -55,6 +55,9 @@ spec: - name: wireguard containerPort: 50180 protocol: UDP + - name: embedded-etcd + containerPort: 2379 + protocol: TCP securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/deploy/helm/omni/templates/statefulset.yaml b/deploy/helm/omni/templates/statefulset.yaml index 6ee351972..16ca306a8 100644 --- a/deploy/helm/omni/templates/statefulset.yaml +++ b/deploy/helm/omni/templates/statefulset.yaml @@ -54,6 +54,9 @@ spec: - name: wireguard containerPort: 50180 protocol: UDP + - name: embedded-etcd + containerPort: 2379 + protocol: TCP securityContext: allowPrivilegeEscalation: false capabilities: From 5192ee67ce89e1d1e60d5336db32970cb29284cc Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Tue, 23 Sep 2025 23:30:31 -0700 Subject: [PATCH 06/11] Update external etcd options --- deploy/helm/omni/README.md | 8 ++++++++ deploy/helm/omni/templates/deployment.yaml | 24 ++++++++++++++-------- deploy/helm/omni/values.yaml | 8 +++++++- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/deploy/helm/omni/README.md b/deploy/helm/omni/README.md index dc3a58e96..a8cd6350a 100644 --- a/deploy/helm/omni/README.md +++ b/deploy/helm/omni/README.md @@ -321,8 +321,11 @@ service: | `etcd.username` | etcd username (direct) | `""` | | `etcd.password` | etcd password (direct) | `""` | | `etcd.auth.secretName` | Secret containing etcd credentials | `""` | +| `etcd.dialKeepAliveTime` | etcd client keep-alive time | `""` | +| `etcd.dialKeepAliveTimeout` | etcd client keep-alive timeout | `""` | | `etcd.tls.enabled` | Enable TLS for etcd | `false` | | `etcd.tls.secretName` | Secret containing TLS certificates | `""` | +| `etcd.publicKeyFiles` | List of public key files for encryption | `[]` | #### etcd Authentication @@ -740,9 +743,14 @@ etcd: - "https://etcd-3.example.com:2379" auth: secretName: "etcd-credentials" + dialKeepAliveTime: "30s" + dialKeepAliveTimeout: "5s" tls: enabled: true secretName: "etcd-tls" + publicKeyFiles: + - "/etc/omni/keys/public1.pem" + - "/etc/omni/keys/public2.pem" service: siderolink: diff --git a/deploy/helm/omni/templates/deployment.yaml b/deploy/helm/omni/templates/deployment.yaml index ff94b53d9..e8d8ac788 100644 --- a/deploy/helm/omni/templates/deployment.yaml +++ b/deploy/helm/omni/templates/deployment.yaml @@ -133,18 +133,21 @@ spec: {{- end }} {{- if and .Values.etcd.tls .Values.etcd.tls.enabled }} {{- if .Values.etcd.tls.secretName }} - - --etcd-tls-cert-file=/etc/etcd/tls/{{ .Values.etcd.tls.certKey | default "client.crt" }} - - --etcd-tls-key-file=/etc/etcd/tls/{{ .Values.etcd.tls.keyKey | default "client.key" }} - - --etcd-tls-ca-file=/etc/etcd/tls/{{ .Values.etcd.tls.caKey | default "ca.crt" }} + - --etcd-client-cert-path=/etc/etcd/tls/{{ .Values.etcd.tls.certKey | default "client.crt" }} + - --etcd-client-key-path=/etc/etcd/tls/{{ .Values.etcd.tls.keyKey | default "client.key" }} + - --etcd-ca-path=/etc/etcd/tls/{{ .Values.etcd.tls.caKey | default "ca.crt" }} {{- else }} - - --etcd-tls-cert-file={{ .Values.etcd.tls.certFile }} - - --etcd-tls-key-file={{ .Values.etcd.tls.keyFile }} - - --etcd-tls-ca-file={{ .Values.etcd.tls.caFile }} - {{- end }} - {{- if .Values.etcd.tls.insecureSkipVerify }} - - --etcd-tls-insecure-skip-verify + - --etcd-client-cert-path={{ .Values.etcd.tls.certFile }} + - --etcd-client-key-path={{ .Values.etcd.tls.keyFile }} + - --etcd-ca-path={{ .Values.etcd.tls.caFile }} {{- end }} {{- end }} + {{- if .Values.etcd.dialKeepAliveTime }} + - --etcd-dial-keepalive-time={{ .Values.etcd.dialKeepAliveTime }} + {{- end }} + {{- if .Values.etcd.dialKeepAliveTimeout }} + - --etcd-dial-keepalive-timeout={{ .Values.etcd.dialKeepAliveTimeout }} + {{- end }} {{- else }} - --etcd-embedded=true {{- end }} @@ -153,6 +156,9 @@ spec: {{- end }} - --name={{ .Values.name}} - --private-key-source={{ .Values.privateKeySource }} + {{- if and .Values.etcd.external .Values.etcd.publicKeyFiles }} + - --public-key-files={{ join "," .Values.etcd.publicKeyFiles }} + {{- end }} - --siderolink-api-advertised-url={{ printf "https://%s" .Values.service.siderolink.domainName }} - --siderolink-wireguard-advertised-addr={{ if .Values.service.siderolink.wireguard.address }}{{ .Values.service.siderolink.wireguard.address }}{{ else }}{{ printf "%s.%s.svc.cluster.local" "wireguard" .Release.Namespace }}{{ end }}:{{ .Values.service.siderolink.wireguard.port }} {{- range $value := .Values.extraArgs }} diff --git a/deploy/helm/omni/values.yaml b/deploy/helm/omni/values.yaml index 21531d687..09ce74163 100644 --- a/deploy/helm/omni/values.yaml +++ b/deploy/helm/omni/values.yaml @@ -119,18 +119,24 @@ etcd: # secretName: "etcd-auth" # usernameKey: "username" # passwordKey: "password" + # Optional: etcd client connection timeouts + # dialKeepAliveTime: "30s" + # dialKeepAliveTimeout: "5s" # Optional: TLS configuration for etcd # tls: # enabled: false # certFile: "/etc/etcd/tls/client.crt" # keyFile: "/etc/etcd/tls/client.key" # caFile: "/etc/etcd/tls/ca.crt" - # insecureSkipVerify: false # # Optional: TLS certificates from secret # secretName: "etcd-tls" # certKey: "client.crt" # keyKey: "client.key" # caKey: "ca.crt" + # Optional: public key files for encryption + # publicKeyFiles: [] + # # - "/etc/omni/keys/public1.pem" + # # - "/etc/omni/keys/public2.pem" volumes: etcd: # For Deployment (backwards compatibility): manual PVC name From 58252a677b584cdd2e50a146c5ec5940a69eaa21 Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Mon, 29 Sep 2025 21:26:51 -0700 Subject: [PATCH 07/11] Fix etcd tls values in chart to match kubernetes tls cert format --- deploy/helm/omni/Chart.yaml | 2 +- deploy/helm/omni/README.md | 8 ++++---- deploy/helm/omni/templates/deployment.yaml | 19 +++++++++++-------- deploy/helm/omni/values.yaml | 9 +++++---- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/deploy/helm/omni/Chart.yaml b/deploy/helm/omni/Chart.yaml index b7dccf56b..d53b79b9e 100644 --- a/deploy/helm/omni/Chart.yaml +++ b/deploy/helm/omni/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: omni description: A helm chart to deploy Omni on a Kubernetes cluster type: application -version: 1.0.0-rc9 +version: 1.0.0-rc10 appVersion: "v1.1.4" diff --git a/deploy/helm/omni/README.md b/deploy/helm/omni/README.md index a8cd6350a..a1fa69739 100644 --- a/deploy/helm/omni/README.md +++ b/deploy/helm/omni/README.md @@ -143,7 +143,7 @@ volumes: ```yaml volumes: etcd: - persistentVolumeClaimName: omni-pvc # Must exist before deployment + persistentVolumeClaimName: omni-pvc # Set to your existing PVC name ``` **Critical Limitation**: Embedded etcd is hardcoded to 1 replica because Omni's embedded etcd does not support clustering. The `deployment.replicaCount` setting is ignored when using embedded etcd. Attempting to scale beyond 1 replica would result in data corruption and split-brain scenarios. @@ -304,7 +304,7 @@ service: | Parameter | Description | Default | |-----------|-------------|---------| -| `volumes.etcd.persistentVolumeClaimName` | etcd PVC name (existing deployments) | `omni-pvc` | +| `volumes.etcd.persistentVolumeClaimName` | etcd PVC name (existing deployments only) | `null` | | `volumes.etcd.size` | etcd storage size (StatefulSet only) | `"50Gi"` | | `volumes.etcd.storageClass` | Storage class for etcd PVC (optional) | `""` | | `volumes.tls.secretName` | TLS secret name | `null` | @@ -363,8 +363,8 @@ etcd: tls: enabled: true secretName: "etcd-tls" - certKey: "client.crt" # optional, defaults to "client.crt" - keyKey: "client.key" # optional, defaults to "client.key" + certKey: "tls.crt" # optional, defaults to "tls.crt" + keyKey: "tls.key" # optional, defaults to "tls.key" caKey: "ca.crt" # optional, defaults to "ca.crt" ``` diff --git a/deploy/helm/omni/templates/deployment.yaml b/deploy/helm/omni/templates/deployment.yaml index e8d8ac788..ac9f977df 100644 --- a/deploy/helm/omni/templates/deployment.yaml +++ b/deploy/helm/omni/templates/deployment.yaml @@ -88,9 +88,11 @@ spec: mountPath: {{ .Values.volumeMounts.omniAsc.mountPath }} subPath: {{ .Values.volumeMounts.omniAsc.subPath }} readOnly: {{ .Values.volumeMounts.omniAsc.readOnly }} + {{- if .Values.volumes.etcd.persistentVolumeClaimName }} - name: etcd mountPath: /_out - {{- if and .Values.etcd.external .Values.etcd.tls .Values.etcd.tls.secretName }} + {{- end }} + {{- if and .Values.etcd.external .Values.etcd.tls.enabled .Values.etcd.tls.secretName }} - name: etcd-tls mountPath: /etc/etcd/tls readOnly: true @@ -120,6 +122,7 @@ spec: - --key=/etc/omni/tls/tls.key {{- end }} {{- if .Values.etcd.external }} + - --etcd-embedded=false - --etcd-endpoints={{ join "," .Values.etcd.endpoints }} {{- if .Values.etcd.username }} - --etcd-username={{ .Values.etcd.username }} @@ -131,16 +134,14 @@ spec: {{- else if and .Values.etcd.auth .Values.etcd.auth.secretName }} - --etcd-password=$(ETCD_PASSWORD) {{- end }} - {{- if and .Values.etcd.tls .Values.etcd.tls.enabled }} - {{- if .Values.etcd.tls.secretName }} - - --etcd-client-cert-path=/etc/etcd/tls/{{ .Values.etcd.tls.certKey | default "client.crt" }} - - --etcd-client-key-path=/etc/etcd/tls/{{ .Values.etcd.tls.keyKey | default "client.key" }} + {{- if and .Values.etcd.tls.enabled .Values.etcd.tls.secretName }} + - --etcd-client-cert-path=/etc/etcd/tls/{{ .Values.etcd.tls.certKey | default "tls.crt" }} + - --etcd-client-key-path=/etc/etcd/tls/{{ .Values.etcd.tls.keyKey | default "tls.key" }} - --etcd-ca-path=/etc/etcd/tls/{{ .Values.etcd.tls.caKey | default "ca.crt" }} - {{- else }} + {{- else if and .Values.etcd.tls.enabled .Values.etcd.tls.certFile }} - --etcd-client-cert-path={{ .Values.etcd.tls.certFile }} - --etcd-client-key-path={{ .Values.etcd.tls.keyFile }} - --etcd-ca-path={{ .Values.etcd.tls.caFile }} - {{- end }} {{- end }} {{- if .Values.etcd.dialKeepAliveTime }} - --etcd-dial-keepalive-time={{ .Values.etcd.dialKeepAliveTime }} @@ -173,10 +174,12 @@ spec: - name: omni-asc secret: secretName: {{ .Values.volumes.gpg.secretName }} + {{- if .Values.volumes.etcd.persistentVolumeClaimName }} - name: etcd persistentVolumeClaim: claimName: {{ .Values.volumes.etcd.persistentVolumeClaimName }} - {{- if and .Values.etcd.external .Values.etcd.tls .Values.etcd.tls.secretName }} + {{- end }} + {{- if and .Values.etcd.external .Values.etcd.tls.enabled .Values.etcd.tls.secretName }} - name: etcd-tls secret: secretName: {{ .Values.etcd.tls.secretName }} diff --git a/deploy/helm/omni/values.yaml b/deploy/helm/omni/values.yaml index 09ce74163..a0a1bc622 100644 --- a/deploy/helm/omni/values.yaml +++ b/deploy/helm/omni/values.yaml @@ -111,6 +111,7 @@ etcd: # - "https://etcd-1.example.com:2379" # - "https://etcd-2.example.com:2379" # - "https://etcd-3.example.com:2379" + # - "etcd.omni-etcd.svc.cluster.local:2379" # Internal service example # Optional: etcd authentication # username: "" # password: "" @@ -130,9 +131,9 @@ etcd: # caFile: "/etc/etcd/tls/ca.crt" # # Optional: TLS certificates from secret # secretName: "etcd-tls" - # certKey: "client.crt" - # keyKey: "client.key" - # caKey: "ca.crt" + # certKey: "tls.crt" # defaults to "tls.crt" + # keyKey: "tls.key" # defaults to "tls.key" + # caKey: "ca.crt" # defaults to "ca.crt" # Optional: public key files for encryption # publicKeyFiles: [] # # - "/etc/omni/keys/public1.pem" @@ -140,7 +141,7 @@ etcd: volumes: etcd: # For Deployment (backwards compatibility): manual PVC name - persistentVolumeClaimName: omni-pvc + persistentVolumeClaimName: null # Set to PVC name if using existing Deployment with manual PVC # For StatefulSet (embedded etcd): automatic PVC provisioning size: "50Gi" # storageClass: "" # Use default storage class if not specified From e532323ae48f51d5e1edba3f40c4f2a63267e8cb Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Fri, 10 Oct 2025 23:05:44 -0700 Subject: [PATCH 08/11] Revert replica count to one - wireguard doesn't handle load balancing well. --- deploy/helm/omni/Chart.yaml | 4 +- deploy/helm/omni/README.md | 47 ++++++------------- deploy/helm/omni/templates/deployment.yaml | 2 +- .../omni/templates/poddisruptionbudget.yaml | 20 -------- deploy/helm/omni/templates/statefulset.yaml | 2 +- deploy/helm/omni/values.yaml | 10 ++-- 6 files changed, 22 insertions(+), 63 deletions(-) delete mode 100644 deploy/helm/omni/templates/poddisruptionbudget.yaml diff --git a/deploy/helm/omni/Chart.yaml b/deploy/helm/omni/Chart.yaml index d53b79b9e..6b85cb58c 100644 --- a/deploy/helm/omni/Chart.yaml +++ b/deploy/helm/omni/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: omni description: A helm chart to deploy Omni on a Kubernetes cluster type: application -version: 1.0.0-rc10 -appVersion: "v1.1.4" +version: 1.0.0-rc11 +appVersion: "v1.2.1" diff --git a/deploy/helm/omni/README.md b/deploy/helm/omni/README.md index a1fa69739..98da2d4a3 100644 --- a/deploy/helm/omni/README.md +++ b/deploy/helm/omni/README.md @@ -4,7 +4,7 @@ A Helm chart for deploying Sidero Omni on Kubernetes clusters. ## Overview -Omni is a SaaS-native Talos Linux cluster fleet management platform that provides centralized management, monitoring, and orchestration capabilities for Talos Linux clusters. This Helm chart deploys Omni as a containerized application on Kubernetes with support for both embedded and external etcd configurations, automatic scaling, and comprehensive ingress management. +Omni is a SaaS-native Talos Linux cluster fleet management platform that provides centralized management, monitoring, and orchestration capabilities for Talos Linux clusters. This Helm chart deploys Omni as a containerized application on Kubernetes with support for both embedded and external etcd configurations and comprehensive ingress management. Note: Omni only supports single-replica deployments due to WireGuard networking requirements. ## Table of Contents @@ -146,17 +146,17 @@ volumes: persistentVolumeClaimName: omni-pvc # Set to your existing PVC name ``` -**Critical Limitation**: Embedded etcd is hardcoded to 1 replica because Omni's embedded etcd does not support clustering. The `deployment.replicaCount` setting is ignored when using embedded etcd. Attempting to scale beyond 1 replica would result in data corruption and split-brain scenarios. +**Critical Limitation**: Omni is hardcoded to 1 replica due to WireGuard networking requirements. Omni does not support high availability or horizontal scaling regardless of etcd configuration. **When to use embedded etcd**: -- Single-instance deployments +- Simple deployments with automatic storage provisioning - Development and testing environments -- Small-scale production deployments where high availability is provided at the infrastructure level +- Production deployments where external etcd is not required **When to use external etcd**: -- Multi-replica deployments for high availability -- Large-scale production environments -- When you need horizontal scaling capabilities +- When you need to manage etcd separately from Omni +- Shared etcd clusters across multiple applications +- Advanced etcd configurations #### External etcd @@ -171,7 +171,7 @@ etcd: - "https://etcd-3.example.com:2379" ``` -This configuration enables horizontal scaling with `deployment.replicaCount > 1`. +Note: Omni still runs as a single replica even with external etcd due to WireGuard limitations. ### Security Configuration @@ -235,7 +235,7 @@ kubectl create secret tls tls-secret \ |-----------|-------------|---------| | `deployment.image` | Container image repository | `ghcr.io/siderolabs/omni` | | `deployment.tag` | Container image tag | `"latest"` | -| `deployment.replicaCount` | Number of replicas | `1` | + | `deployment.imagePullPolicy` | Image pull policy | `IfNotPresent` | | `deployment.annotations` | Deployment annotations | `{}` | @@ -409,11 +409,7 @@ The Kubernetes proxy ingress automatically creates a wildcard rule (`*.kubernete ### Pod Disruption Budget -```yaml -podDisruptionBudget: - enabled: true - minAvailable: 1 # or use maxUnavailable -``` +Pod Disruption Budget is not applicable since Omni only supports single-replica deployments. ### Per-Service Annotations @@ -471,14 +467,12 @@ The chart automatically chooses the appropriate Kubernetes resource based on dep 5. **Resource type changes** → Only occur when switching etcd modes and no existing resource conflicts **StatefulSet Benefits** (new deployments only): -- Automatic PVC provisioning per replica +- Automatic PVC provisioning - Stable network identities - Ordered deployment and scaling -- Limited to 1 replica (embedded etcd constraint) **Deployment Benefits**: - Backwards compatibility with existing installations -- Horizontal scaling when using external etcd - Simpler storage management for external etcd scenarios ### WireGuard Address Resolution @@ -548,10 +542,9 @@ Consider implementing network policies to restrict traffic to Omni services base #### Scaling Issues -1. **StatefulSet Scaling**: Cannot scale beyond 1 replica with embedded etcd - this is enforced by the chart -2. **Replica Count Ignored**: `deployment.replicaCount > 1` is ignored when `etcd.external: false` -3. **External etcd Required**: Use `etcd.external: true` for multiple replicas -4. **Data Corruption Risk**: Never attempt to manually scale the StatefulSet beyond 1 replica +1. **Single Replica Only**: Omni only supports 1 replica due to WireGuard networking limitations +2. **No High Availability**: Omni cannot be deployed in a highly available configuration +3. **StatefulSet/Deployment**: Both resource types are limited to 1 replica #### Service Connectivity @@ -647,8 +640,6 @@ etcd: - "https://etcd-1.example.com:2379" - "https://etcd-2.example.com:2379" - "https://etcd-3.example.com:2379" -deployment: - replicaCount: 3 # Now supports multiple replicas ``` 5. **Upgrade deployment**: @@ -691,9 +682,6 @@ volumes: domainName: omni.example.com accountUuid: "12345678-1234-1234-1234-123456789012" -deployment: - replicaCount: 3 - auth: auth0: enabled: true @@ -726,9 +714,6 @@ volumes: domainName: omni.example.com accountUuid: "12345678-1234-1234-1234-123456789012" -deployment: - replicaCount: 3 - auth: auth0: enabled: true @@ -800,9 +785,7 @@ ingress: enabled: true secretName: omni-kubernetes-proxy-tls -podDisruptionBudget: - enabled: true - minAvailable: 2 +# Pod disruption budget not applicable for single-replica deployments volumes: gpg: diff --git a/deploy/helm/omni/templates/deployment.yaml b/deploy/helm/omni/templates/deployment.yaml index ac9f977df..679642c33 100644 --- a/deploy/helm/omni/templates/deployment.yaml +++ b/deploy/helm/omni/templates/deployment.yaml @@ -18,7 +18,7 @@ metadata: spec: strategy: type: Recreate - replicas: {{ .Values.deployment.replicaCount }} + replicas: 1 selector: matchLabels: {{- include "omni.selectorLabels" . | nindent 6 }} template: diff --git a/deploy/helm/omni/templates/poddisruptionbudget.yaml b/deploy/helm/omni/templates/poddisruptionbudget.yaml deleted file mode 100644 index 3e7256f36..000000000 --- a/deploy/helm/omni/templates/poddisruptionbudget.yaml +++ /dev/null @@ -1,20 +0,0 @@ -{{- if .Values.podDisruptionBudget.enabled }} ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: {{ include "omni.name" . }} - namespace: {{ .Release.Namespace }} - labels: - {{- include "omni.labels" . | nindent 4 }} -spec: - {{- if .Values.podDisruptionBudget.minAvailable }} - minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} - {{- end }} - {{- if .Values.podDisruptionBudget.maxUnavailable }} - maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} - {{- end }} - selector: - matchLabels: - {{- include "omni.selectorLabels" . | nindent 6 }} -{{- end }} \ No newline at end of file diff --git a/deploy/helm/omni/templates/statefulset.yaml b/deploy/helm/omni/templates/statefulset.yaml index 16ca306a8..a937e26ac 100644 --- a/deploy/helm/omni/templates/statefulset.yaml +++ b/deploy/helm/omni/templates/statefulset.yaml @@ -17,7 +17,7 @@ metadata: {{- toYaml .Values.deployment.annotations | nindent 4 }} spec: serviceName: {{ include "omni.name" . }} - replicas: 1 # Embedded etcd only supports single instance + replicas: 1 selector: matchLabels: {{- include "omni.selectorLabels" . | nindent 6 }} template: diff --git a/deploy/helm/omni/values.yaml b/deploy/helm/omni/values.yaml index a0a1bc622..312b81f79 100644 --- a/deploy/helm/omni/values.yaml +++ b/deploy/helm/omni/values.yaml @@ -5,7 +5,6 @@ privateKeySource: "file:///omni.asc" deployment: image: ghcr.io/siderolabs/omni tag: "latest" - replicaCount: 1 annotations: {} imagePullPolicy: IfNotPresent auth: @@ -41,10 +40,7 @@ resources: # Required for /dev/net/tun # https://www.talos.dev/v1.8/kubernetes-guides/configuration/device-plugins/ squat.ai/tun: 1 -podDisruptionBudget: - enabled: false - minAvailable: 1 - # maxUnavailable: 1 + ingress: api: enabled: false @@ -103,8 +99,8 @@ volumeMounts: subPath: "omni.asc" readOnly: true etcd: - # Set to true to use external etcd cluster (enables Deployment with multiple replicas) - # Set to false to use embedded etcd (forces StatefulSet with 1 replica) + # Set to true to use external etcd cluster + # Set to false to use embedded etcd (default) external: false # External etcd configuration (only used when external: true) endpoints: [] From be52d10617e3b713e52627aad0d134b3a5420166 Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Mon, 13 Oct 2025 16:22:57 -0700 Subject: [PATCH 09/11] README updated --- deploy/helm/omni/README.md | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/deploy/helm/omni/README.md b/deploy/helm/omni/README.md index 98da2d4a3..990206ac6 100644 --- a/deploy/helm/omni/README.md +++ b/deploy/helm/omni/README.md @@ -127,7 +127,7 @@ auth: #### Embedded etcd (Default) -**New Deployments**: When using embedded etcd (`etcd.external: false`), Omni is deployed as a StatefulSet with automatic PVC provisioning: +**Automatic PVC Provisioning** (for embedded etcd): ```yaml etcd: @@ -138,12 +138,12 @@ volumes: storageClass: "fast-ssd" # optional ``` -**Existing Deployments**: Continue using Deployment with manual PVC management: +**Manual PVC Management** (for external etcd or existing deployments): ```yaml volumes: etcd: - persistentVolumeClaimName: omni-pvc # Set to your existing PVC name + persistentVolumeClaimName: omni-pvc # Set to your PVC name ``` **Critical Limitation**: Omni is hardcoded to 1 replica due to WireGuard networking requirements. Omni does not support high availability or horizontal scaling regardless of etcd configuration. @@ -436,22 +436,14 @@ service: | `customVolumes` | Additional volumes | `[]` | | `customVolumeMounts` | Additional volume mounts | `[]` | -## Backwards Compatibility +## Resource Selection -The chart maintains full backwards compatibility with existing deployments: +The chart automatically chooses the appropriate Kubernetes resource based on etcd configuration: -**Existing Deployments**: -- Charts deployed with previous versions continue using Deployment resources -- Storage configuration remains unchanged (manual PVC management) -- No disruption during upgrades -- `etcd.external` setting is ignored for existing deployments - -**New Deployments**: -- `etcd.external: false` (default) → StatefulSet with automatic PVC provisioning +- `etcd.external: false` (default) → StatefulSet with embedded etcd and automatic PVC provisioning - `etcd.external: true` → Deployment for external etcd clusters -**Detection Logic**: -The chart uses Helm's `lookup` function to detect existing Deployment resources and automatically maintains compatibility. +The chart uses Helm's `lookup` function to detect existing resources and maintains compatibility with previous deployments. ## Architecture Decisions @@ -460,19 +452,18 @@ The chart uses Helm's `lookup` function to detect existing Deployment resources The chart automatically chooses the appropriate Kubernetes resource based on deployment history and etcd configuration: **Resource Selection Logic**: -1. **Existing Deployment detected** → Continue using Deployment (backwards compatibility) -2. **Existing StatefulSet detected** → Continue using StatefulSet (backwards compatibility) +1. **Existing Deployment detected** → Continue using Deployment +2. **Existing StatefulSet detected** → Continue using StatefulSet 3. **New deployment + `etcd.external: false`** → Use StatefulSet with embedded etcd 4. **New deployment + `etcd.external: true`** → Use Deployment with external etcd -5. **Resource type changes** → Only occur when switching etcd modes and no existing resource conflicts -**StatefulSet Benefits** (new deployments only): +**StatefulSet Benefits**: - Automatic PVC provisioning - Stable network identities - Ordered deployment and scaling **Deployment Benefits**: -- Backwards compatibility with existing installations +- Compatibility with existing installations - Simpler storage management for external etcd scenarios ### WireGuard Address Resolution @@ -838,12 +829,11 @@ extraArgs: ## Upgrading -### Backwards Compatibility +### Compatibility -Upgrading from previous chart versions is fully supported: +The chart maintains compatibility with existing deployments: -- **Existing Deployments**: Continue using the same Deployment resource and storage configuration -- **No Resource Changes**: The chart automatically detects existing deployments and maintains compatibility +- **Resource Detection**: Automatically detects existing Deployment or StatefulSet resources - **Configuration Preserved**: All existing values and storage remain unchanged ### Backup From 49832285d0c25fed3760d115cc997fdd7f52079b Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Mon, 13 Oct 2025 16:25:31 -0700 Subject: [PATCH 10/11] Set chart version to 1.0 --- deploy/helm/omni/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/helm/omni/Chart.yaml b/deploy/helm/omni/Chart.yaml index 6b85cb58c..d7faef4be 100644 --- a/deploy/helm/omni/Chart.yaml +++ b/deploy/helm/omni/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: omni description: A helm chart to deploy Omni on a Kubernetes cluster type: application -version: 1.0.0-rc11 +version: 1.0.0 appVersion: "v1.2.1" From f53360a7561617c7ce28715ddfa75702d6bdba35 Mon Sep 17 00:00:00 2001 From: Jeff Hagadorn Date: Thu, 30 Oct 2025 10:46:37 -0700 Subject: [PATCH 11/11] Fix oidc typo in ingress template. --- deploy/helm/omni/templates/ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/helm/omni/templates/ingress.yaml b/deploy/helm/omni/templates/ingress.yaml index 23010838f..31f75b0f8 100644 --- a/deploy/helm/omni/templates/ingress.yaml +++ b/deploy/helm/omni/templates/ingress.yaml @@ -93,7 +93,7 @@ spec: name: internal-grpc port: number: 8080 - path: /oicd. + path: /oidc. pathType: ImplementationSpecific {{- if and .Values.ingress.api.tls .Values.ingress.api.tls.enabled }} tls: