diff --git a/full-ai-cluster/README.md b/full-ai-cluster/README.md new file mode 100644 index 0000000000..b717409cc6 --- /dev/null +++ b/full-ai-cluster/README.md @@ -0,0 +1,243 @@ +# full-ai-cluster + +End-to-end declarative AI cluster. Starts with the USB bootstrap +(identical snippet at `./usb-nixos-installer/`) and continues +through every layer up to running AI workloads. + +## What's inside + +``` +full-ai-cluster/ +├── usb-nixos-installer/ ← byte-identical copy of ../usb-nixos-installer +├── flake.nix ← cluster flake (host configs + linux-builder) +├── nixos/ +│ ├── modules/ ← shared NixOS modules +│ │ ├── common.nix +│ │ ├── k3s-server.nix ← K3S control-plane (flannel disabled for Cilium) +│ │ ├── k3s-agent.nix ← K3S worker +│ │ ├── gpu.nix ← NVIDIA drivers + container toolkit +│ │ ├── gpu-passthrough.nix ← VFIO passthrough for VM workloads +│ │ ├── gpu-device-plugin.nix ← K8s device plugin (NVIDIA/AMD/Intel) +│ │ ├── docker.nix ← Docker via NixFlake +│ │ └── local-storage.nix ← local-path-provisioner storage class +│ └── hosts/ +│ ├── control-plane/ ← configuration.nix + hardware + README +│ └── worker-gpu/ ← configuration.nix + hardware + README +└── k8s/ + ├── bootstrap/ ← K3S auto-applies on first boot (in this order) + │ ├── cilium-namespace.yaml + │ ├── cilium-install.yaml ← CNI must exist before any pods (incl. ArgoCD) + │ ├── argocd-namespace.yaml + │ ├── argocd-install.yaml + │ └── root-application.yaml ← App-of-Apps root + └── applications/ ← ArgoCD watches recursively + ├── cilium/ ← CNI + Hubble + KPR + BPF MASQUERADE + ├── orleans/ ← distributed cron #1 + ├── temporal/ ← distributed cron #2 (TS) + ├── dapr/ ← distributed cron #3 (actors) + ├── gitlab/ ← self-hosted Git host (option A) + ├── forgejo/ ← self-hosted Git host (option B, lighter) + ├── argo-workflows/ ← DAG job scheduler + ├── argo-rollouts/ ← progressive delivery + ├── longhorn/ ← distributed block storage + ├── cockroachdb/ ← distributed SQL + ├── hindsight/ ← agent persistent memory for Hermes (chart URL TBD) + ├── oz/ ← OpenZiti zero-trust overlay + ├── hermes/ ← custom AI agent (cloud LLMs via SOPS-baked keys, OZ transport, Hindsight memory) + ├── ollama/ ← LLM serving (option A — local — DEFERRED) + ├── vllm/ ← LLM serving (option B — high-throughput — DEFERRED) + ├── deepseek-coder/ ← model deploy → Ollama or vLLM (DEFERRED with local) + ├── qwen-coder/ ← model deploy → Ollama or vLLM (DEFERRED with local) + ├── kube-prometheus-stack/ ← Prometheus + Grafana + Alertmanager + ├── nats/ ← messaging + ├── redis/ ← cache + ├── weaviate/ ← vector DB + ├── loki/ ← logs + ├── tempo/ ← traces + ├── alloy/ ← OpenTelemetry collector + ├── mimir/ ← long-term metrics storage + ├── istio/ ← service mesh + ├── open-policy-agent/ ← admission policy + ├── sealed-secrets/ ← secrets at rest in git (option A) + └── vault/ ← runtime secrets (option B) +``` + +## Two layers, two reconcilers + +- **OS layer** is reconciled by **Nix + NixOS**. Everything in + `./nixos/` lands on a target machine via `nixos-install --flake` + (initial install) or `nixos-rebuild switch --flake` (updates). +- **Cluster layer** is reconciled by **ArgoCD**. K3S auto-applies + the bootstrap manifests at `./k8s/bootstrap/` on first boot + (Cilium → ArgoCD → root Application); ArgoCD then reads + `./k8s/bootstrap/root-application.yaml` (App-of-Apps) and + reconciles every workload under `./k8s/applications/` from the + same Git repo every ~3 minutes. + +This split is intentional: anything that must run BEFORE the +cluster API exists (kernel modules, CNI host setup, container +runtime, GPU drivers, K3S itself, base packages, storage class +host bits) belongs in Nix. Everything else belongs in K8s manifests +ArgoCD reconciles. + +## Bootstrap end-to-end + +### 1. Build the installer ISO (one-time, on your workstation) + +```bash +cd full-ai-cluster +nix build .#installer-iso +# Output: ./result/iso/zeta-installer-24.11.iso (~1.5-2 GB) +``` + +If you're on Apple Silicon and don't yet have the linux-builder +running, apply the nix-darwin config first: + +```bash +nix run nix-darwin/nix-darwin-24.11#darwin-rebuild -- switch --flake .#zeta-mac +``` + +### 2. Write to USB stick + +```bash +# macOS: +diskutil list +diskutil unmountDisk /dev/diskN # N = your USB device number +sudo dd if=result/iso/zeta-installer-*.iso of=/dev/rdiskN bs=4m status=progress +diskutil eject /dev/diskN + +# Linux: +lsblk +sudo dd if=result/iso/zeta-installer-*.iso of=/dev/sdX bs=4M status=progress conv=fsync +sync +``` + +(Or use Balena Etcher / Rufus for a GUI — same outcome.) + +### 3. Install on each target machine + +Boot the target on the USB stick. Then at the console: + +```bash +# Network up: +nmtui + +# Pick the target disk + partition (parted/gptfdisk/zfs all on the stick). +# Example minimal layout (single ext4 + EFI): +sgdisk --zap-all /dev/sda +sgdisk -n 1:0:+512M -t 1:ef00 -c 1:boot /dev/sda +sgdisk -n 2:0:0 -t 2:8300 -c 2:nixos /dev/sda +mkfs.fat -F 32 -n boot /dev/sda1 +mkfs.ext4 -L nixos /dev/sda2 +mount /dev/disk/by-label/nixos /mnt +mkdir -p /mnt/boot && mount /dev/disk/by-label/boot /mnt/boot + +# Clone the cluster flake: +git clone https://github.com/Lucent-Financial-Group/Zeta /mnt/etc/zeta + +# Generate per-machine hardware config and copy into the host dir: +nixos-generate-config --root /mnt +cp /mnt/etc/nixos/hardware-configuration.nix \ + /mnt/etc/zeta/full-ai-cluster/nixos/hosts//hardware-configuration.nix + +# Seed the K3S cluster token (control-plane only on first run): +nixos-enter --root /mnt -- bash -c ' + mkdir -p /var/lib/rancher/k3s/server + openssl rand -hex 64 > /var/lib/rancher/k3s/server/token + chmod 600 /var/lib/rancher/k3s/server/token +' +# (Copy this token to /var/lib/rancher/k3s/agent/token on every worker) + +# Install: +nixos-install --flake /mnt/etc/zeta/full-ai-cluster# +# = control-plane | worker-gpu | ... + +# Reboot. K3S, Cilium, ArgoCD, all workloads come up declaratively. +reboot +``` + +### 4. Verify the cluster is alive + +After the control-plane reboots: + +```bash +ssh zeta@control-plane.zeta.local +sudo kubectl get nodes +sudo kubectl -n argocd get pods +sudo kubectl -n argocd get applications +sudo cilium status +sudo cilium hubble enable --ui # if not already enabled by Helm values +``` + +### 5. Add more machines + +Repeat step 3 on each machine with the appropriate `` name. +Add new `nixosConfigurations.` entries to `flake.nix` as needed. + +## Component status + +- ✅ Well-defined upstream charts (Cilium, ArgoCD, Temporal, GitLab, + Forgejo, Argo Workflows / Rollouts, Longhorn, CockroachDB, NATS, + Redis, Weaviate, Loki / Tempo / Alloy / Mimir, kube-prometheus-stack, + Istio, OPA, Sealed Secrets, Vault, OpenZiti) +- 🟡 Custom workloads needing maintainer input: + - **Hermes** — Aaron-built AI agent oriented at cloud LLM APIs + (Anthropic, OpenAI, etc.) with SOPS-baked keys + OZ transport + + Hindsight memory backend. Image build + push are maintainer + responsibility; the manifest scaffold + env vars are wired. + - **Orleans Silo** — custom Silo image embedding your grain code. +- ⏳ Deferred (local-models phase — wait for now per "we only care about cloud right now"): + - Ollama, vLLM, Deepseek Coder, Qwen Coder Applications stay + in the tree at `replicas: 0` so the topology is preserved. + Bump replicas + rebuild Hermes against local endpoints when + the local-models phase comes back online. +- ❓ Awaiting maintainer input: + - **Hindsight** — confirmed as standalone helm chart for agent + persistent memory for Hermes. `Application.yaml` has TODO + awaiting `repoURL` + chart name + version. + +## Secrets + +- **Sealed Secrets** — store encrypted secrets directly in Git, + decrypted by the controller at apply time. Good for low-churn + config-style secrets. +- **HashiCorp Vault** — runtime secrets injection via the Vault + Agent or external-secrets operator. Good for high-churn secrets + + rotation + audit. +- **SOPS** — file-level encryption (age/gpg/KMS); used for + Hermes-image-time secrets baked at Docker build per your spec. + +All three coexist deliberately: different secrets have different +lifetimes + access patterns. + +## Component composition + +| Component | NixFlake or ArgoCD | Notes | +|---|---|---| +| NixOS + bootloader | Nix | USB installer | +| K3S | Nix (per-host module) | flannel + servicelb disabled (Cilium takes over) | +| Cilium | ArgoCD | KPR, Hubble Relay + UI, BPF MASQUERADE enabled | +| Docker | Nix (per-host module) | for non-K8s container workloads | +| Local-path storage | Nix (per-host module) | host-path PV for stateless workloads | +| GPU drivers (NVIDIA) | Nix (per-host module) | proprietary driver, container toolkit | +| GPU passthrough (VFIO) | Nix (per-host module) | for VM workloads on the same hosts | +| GPU device plugin (K8s) | Nix (per-host module) | exposes `nvidia.com/gpu`, `amd.com/gpu`, `intel.com/gpu` to pods | +| Everything else | ArgoCD | reconciled from `k8s/applications/` | + +The Cilium choice DISPLACES K3S's default flannel CNI. The +control-plane's `k3s-server.nix` passes `--flannel-backend=none` +and `--disable-network-policy` to K3S; Cilium owns CNI, kube-proxy +replacement, and network policy. + +## Updating the cluster + +- **OS layer** changes: edit the relevant file under `./nixos/`, + commit, push. Then on each target: + `sudo nixos-rebuild switch --flake /etc/zeta/full-ai-cluster#` +- **Cluster layer** changes: edit the relevant `Application.yaml` + or referenced manifest, commit, push. ArgoCD reconciles within + ~3 minutes. + +For a full cluster rebuild from scratch: this directory IS the +desired state. Wipe everything, rerun the bootstrap, end up at +the same place. diff --git a/full-ai-cluster/flake.nix b/full-ai-cluster/flake.nix new file mode 100644 index 0000000000..d388097e6f --- /dev/null +++ b/full-ai-cluster/flake.nix @@ -0,0 +1,167 @@ +# full-ai-cluster/flake.nix +# +# End-to-end declarative AI cluster flake. +# +# The USB installer (./usb-nixos-installer/) is the snippet at the +# start of this directory. After installation completes, K3S auto- +# applies the bootstrap manifests at ./k8s/bootstrap/ which install +# ArgoCD. ArgoCD then reconciles every other workload from +# ./k8s/applications/. +# +# Bootstrap flow: +# 1. Build USB: nix build .#installer-iso +# 2. Write to USB: sudo dd if=result/iso/*.iso of=/dev/sdX bs=4M +# 3. Boot target on USB +# 4. Clone Zeta + nixos-install --flake .# +# 5. Reboot. K3S + Cilium + ArgoCD + everything come up declaratively. + +{ + description = "Zeta full AI cluster — declarative from USB to running workloads"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; + nixos-hardware.url = "github:NixOS/nixos-hardware/master"; + flake-utils.url = "github:numtide/flake-utils"; + + # nix-darwin pinned to matching release branch so Apple Silicon + # maintainers can build the x86_64-linux ISO via the linux-builder + # VM (Virtualization.framework + Rosetta 2). + nix-darwin = { + url = "github:nix-darwin/nix-darwin/nix-darwin-24.11"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + }; + + outputs = { self, nixpkgs, nixos-hardware, flake-utils, nix-darwin, ... }@inputs: + let + stateVersion = "24.11"; + + supportedSystems = [ + "x86_64-linux" + "aarch64-linux" + "aarch64-darwin" + ]; + + isoBuildSystems = [ + "x86_64-linux" + "aarch64-darwin" + ]; + + mkSystem = { system ? "x86_64-linux", modules }: nixpkgs.lib.nixosSystem { + inherit system; + specialArgs = { inherit inputs stateVersion; }; + modules = modules; + }; + in + { + # NixOS configurations: installer image + per-host targets. + nixosConfigurations = { + # USB installer ISO — identical to the standalone + # usb-nixos-installer/ flake at the parent level. + installer = mkSystem { + modules = [ + ./usb-nixos-installer/nixos/installer/configuration.nix + ]; + }; + + # Control-plane: K3S server + Cilium CNI + ArgoCD bootstrap. + control-plane = mkSystem { + modules = [ + ./nixos/hosts/control-plane/configuration.nix + ]; + }; + + # GPU worker template. Duplicate this entry per physical worker + # (worker-gpu-01, worker-gpu-02, ...) once hardware-configuration + # files for each are committed. + worker-gpu = mkSystem { + modules = [ + ./nixos/hosts/worker-gpu/configuration.nix + ]; + }; + }; + + # Shared NixOS modules — per-host configs import these via + # relative path; this output exposes them so external flakes + # can reuse the same modules. + nixosModules = { + common = ./nixos/modules/common.nix; + k3s-server = ./nixos/modules/k3s-server.nix; + k3s-agent = ./nixos/modules/k3s-agent.nix; + gpu = ./nixos/modules/gpu.nix; + gpu-passthrough = ./nixos/modules/gpu-passthrough.nix; + gpu-device-plugin = ./nixos/modules/gpu-device-plugin.nix; + docker = ./nixos/modules/docker.nix; + local-storage = ./nixos/modules/local-storage.nix; + }; + + # nix-darwin config for maintainer Macs (Apple Silicon). Enables + # the linux-builder VM so `nix build .#installer-iso` works + # locally without Parallels / Lima / remote builders. + darwinConfigurations.zeta-mac = nix-darwin.lib.darwinSystem { + system = "aarch64-darwin"; + specialArgs = { inherit inputs; }; + modules = [ + ({ pkgs, lib, ... }: { + nix.settings = { + experimental-features = [ "nix-command" "flakes" ]; + trusted-users = [ "@admin" ]; + extra-platforms = [ "x86_64-linux" ]; + }; + nix.linux-builder = { + enable = true; + ephemeral = false; + maxJobs = 4; + supportedFeatures = [ "kvm" "benchmark" "big-parallel" ]; + config = { + virtualisation = { + darwin-builder = { + diskSize = 40 * 1024; + memorySize = 8 * 1024; + }; + cores = 6; + }; + }; + }; + environment.systemPackages = with pkgs; [ + git gh jq yq-go ripgrep fd htop + kubectl kubernetes-helm k9s argocd + age sops ssh-to-age + nix-output-monitor nvd nh + ]; + system.stateVersion = 5; + nixpkgs.hostPlatform = "aarch64-darwin"; + }) + ]; + }; + } // flake-utils.lib.eachSystem supportedSystems (system: + let + pkgs = import nixpkgs { inherit system; }; + in + { + packages = nixpkgs.lib.optionalAttrs (builtins.elem system isoBuildSystems) { + installer-iso = + self.nixosConfigurations.installer.config.system.build.isoImage; + default = self.packages.${system}.installer-iso; + }; + + devShells.default = pkgs.mkShell { + name = "zeta-ai-cluster-admin"; + packages = with pkgs; [ + nix-output-monitor nvd nh + kubectl kubernetes-helm k9s argocd + cilium-cli hubble + age sops ssh-to-age + git gh jq yq-go ripgrep fd + ]; + shellHook = '' + echo "zeta-ai-cluster admin shell." + echo " Build USB ISO: nix build .#installer-iso" + echo " Build host system: nixos-rebuild build --flake .#" + echo " Talk to cluster: kubectl / k9s / argocd / cilium / hubble" + ''; + }; + + formatter = pkgs.nixpkgs-fmt; + }); +} diff --git a/full-ai-cluster/k8s/applications/alloy/Application.yaml b/full-ai-cluster/k8s/applications/alloy/Application.yaml new file mode 100644 index 0000000000..54f28525fa --- /dev/null +++ b/full-ai-cluster/k8s/applications/alloy/Application.yaml @@ -0,0 +1,46 @@ +# Alloy — Grafana's OpenTelemetry collector. Ships logs to Loki, +# metrics to Mimir/Prometheus, traces to Tempo. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: alloy + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://grafana.github.io/helm-charts + chart: alloy + targetRevision: 0.10.1 + helm: + releaseName: alloy + valuesObject: + controller: + type: daemonset # collect from every node + alloy: + configMap: + create: true + content: | + logging { + level = "info" + format = "logfmt" + } + // Logs -> Loki + loki.write "loki" { + endpoint { url = "http://loki.loki.svc.cluster.local:3100/loki/api/v1/push" } + } + // Traces -> Tempo + otelcol.exporter.otlp "tempo" { + client { endpoint = "http://tempo.tempo.svc.cluster.local:4317" } + } + // Metrics -> Mimir (or fall back to local Prometheus) + prometheus.remote_write "mimir" { + endpoint { url = "http://mimir-distributor.mimir.svc.cluster.local:8080/api/v1/push" } + } + destination: + server: https://kubernetes.default.svc + namespace: alloy + syncPolicy: + automated: { prune: true, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/argo-rollouts/Application.yaml b/full-ai-cluster/k8s/applications/argo-rollouts/Application.yaml new file mode 100644 index 0000000000..98205a6887 --- /dev/null +++ b/full-ai-cluster/k8s/applications/argo-rollouts/Application.yaml @@ -0,0 +1,30 @@ +# Argo Rollouts — progressive delivery (canary, blue-green). + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: argo-rollouts + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://argoproj.github.io/argo-helm + chart: argo-rollouts + targetRevision: 2.39.5 + helm: + releaseName: argo-rollouts + valuesObject: + controller: + replicas: 1 + dashboard: + enabled: true + serviceType: ClusterIP + notifications: + enabled: true + destination: + server: https://kubernetes.default.svc + namespace: argo-rollouts + syncPolicy: + automated: { prune: true, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/argo-workflows/Application.yaml b/full-ai-cluster/k8s/applications/argo-workflows/Application.yaml new file mode 100644 index 0000000000..9f19ab65a9 --- /dev/null +++ b/full-ai-cluster/k8s/applications/argo-workflows/Application.yaml @@ -0,0 +1,34 @@ +# Argo Workflows — DAG job scheduler. Drives AI pipelines + batch jobs. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: argo-workflows + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://argoproj.github.io/argo-helm + chart: argo-workflows + targetRevision: 0.42.5 + helm: + releaseName: argo-workflows + valuesObject: + controller: + workflowDefaults: + spec: + activeDeadlineSeconds: 86400 + ttlStrategy: { secondsAfterCompletion: 604800 } + podGC: { strategy: OnPodCompletion } + parallelism: 50 + server: + authModes: [ server ] + executor: + image: { tag: "" } + destination: + server: https://kubernetes.default.svc + namespace: argo-workflows + syncPolicy: + automated: { prune: true, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/cilium/Application.yaml b/full-ai-cluster/k8s/applications/cilium/Application.yaml new file mode 100644 index 0000000000..1a008a576f --- /dev/null +++ b/full-ai-cluster/k8s/applications/cilium/Application.yaml @@ -0,0 +1,69 @@ +# Cilium — CNI + KPR + Hubble + BPF MASQUERADE. +# Replaces K3S's default flannel + kube-proxy (disabled in k3s-server.nix). + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cilium + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://helm.cilium.io/ + chart: cilium + targetRevision: 1.16.5 + helm: + releaseName: cilium + valuesObject: + kubeProxyReplacement: true + k8sServiceHost: control-plane.zeta.local + k8sServicePort: 6443 + ipam: + mode: cluster-pool + operator: + clusterPoolIPv4PodCIDRList: [ "10.42.0.0/16" ] + + # BPF MASQUERADE (required by the spec). + bpf: + masquerade: true + + # Routing: native routing is faster than VXLAN encapsulation + # when the underlying network supports it. + routingMode: native + ipv4NativeRoutingCIDR: "10.42.0.0/16" + autoDirectNodeRoutes: true + + # Hubble (observability + Hubble Relay + Hubble UI). + hubble: + enabled: true + relay: + enabled: true + ui: + enabled: true + metrics: + enabled: + - dns + - drop + - tcp + - flow + - icmp + - http + enableOpenMetrics: true + serviceMonitor: + enabled: false # enabled once kube-prometheus-stack lands + + operator: + replicas: 1 # bump to 2 once a second control-plane exists + + destination: + server: https://kubernetes.default.svc + namespace: kube-system + syncPolicy: + automated: + prune: false # never prune Cilium — too load-bearing + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/full-ai-cluster/k8s/applications/cockroachdb/Application.yaml b/full-ai-cluster/k8s/applications/cockroachdb/Application.yaml new file mode 100644 index 0000000000..53f3f65f86 --- /dev/null +++ b/full-ai-cluster/k8s/applications/cockroachdb/Application.yaml @@ -0,0 +1,43 @@ +# CockroachDB — distributed SQL. Backs Temporal + GitLab + Forgejo +# + any workload wanting strong consistency + horizontal scale. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cockroachdb + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://charts.cockroachdb.com/ + chart: cockroachdb + targetRevision: 14.0.5 + helm: + releaseName: cockroachdb + valuesObject: + statefulset: + replicas: 3 + storage: + persistentVolume: + enabled: true + size: 100Gi + storageClass: longhorn + conf: + single-node: false + cache: 25% + max-sql-memory: 25% + tls: + enabled: true + certs: + selfSigner: + enabled: true + caProvided: false + prometheus: + enabled: true # for kube-prometheus-stack scrape + destination: + server: https://kubernetes.default.svc + namespace: cockroachdb + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/dapr/Application.yaml b/full-ai-cluster/k8s/applications/dapr/Application.yaml new file mode 100644 index 0000000000..4bfd9f9efd --- /dev/null +++ b/full-ai-cluster/k8s/applications/dapr/Application.yaml @@ -0,0 +1,32 @@ +# Dapr — distributed actor runtime #3. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: dapr + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://dapr.github.io/helm-charts/ + chart: dapr + targetRevision: 1.14.4 + helm: + releaseName: dapr + valuesObject: + global: + ha: { enabled: false } # bump once multi-control-plane + dapr_placement: + replicaCount: 1 + # Actors persistence: configure a Redis/CockroachDB state + # store via a Dapr Component once those land. Example: + # configuration: + # metricSpec: + # enabled: true + destination: + server: https://kubernetes.default.svc + namespace: dapr-system + syncPolicy: + automated: { prune: true, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/deepseek-coder/Application.yaml b/full-ai-cluster/k8s/applications/deepseek-coder/Application.yaml new file mode 100644 index 0000000000..a033fbed35 --- /dev/null +++ b/full-ai-cluster/k8s/applications/deepseek-coder/Application.yaml @@ -0,0 +1,28 @@ +# Deepseek Coder — model deploy via Ollama OR vLLM. +# +# This Application is structural — the model itself is pulled by +# whichever serving stack you chose (Ollama via the `models.pull:` +# in its Helm values, OR vLLM via its `--model` arg). This +# Application just declares the namespace + an info ConfigMap so +# operators can grep for the model deploy via the same tooling. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: deepseek-coder + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://github.com/Lucent-Financial-Group/Zeta + targetRevision: main + path: full-ai-cluster/k8s/applications/deepseek-coder + directory: + include: '{namespace,configmap}.yaml' + destination: + server: https://kubernetes.default.svc + namespace: models + syncPolicy: + automated: { prune: true, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/deepseek-coder/configmap.yaml b/full-ai-cluster/k8s/applications/deepseek-coder/configmap.yaml new file mode 100644 index 0000000000..bae1b3a391 --- /dev/null +++ b/full-ai-cluster/k8s/applications/deepseek-coder/configmap.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: models + labels: { app.kubernetes.io/part-of: zeta-ai } +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: deepseek-coder + namespace: models +data: + model: "deepseek-coder:33b" + served-by: "ollama|vllm" # set to whichever you enabled + endpoint-ollama: "http://ollama.ollama.svc.cluster.local:11434" + endpoint-vllm: "http://vllm.vllm.svc.cluster.local:8000" + size-vram-gb: "24" # rough VRAM target for q4 quant + license: "deepseek-license-v1" diff --git a/full-ai-cluster/k8s/applications/forgejo/Application.yaml b/full-ai-cluster/k8s/applications/forgejo/Application.yaml new file mode 100644 index 0000000000..ff06180de1 --- /dev/null +++ b/full-ai-cluster/k8s/applications/forgejo/Application.yaml @@ -0,0 +1,39 @@ +# Forgejo — self-hosted Git. Lighter than GitLab; pick one. +# +# DEFAULT: NOT reconciled. The repo ships both gitlab/ and forgejo/ +# Application definitions for review; only one should run at a time. +# GitLab is the default-on; Forgejo is manual-sync. To switch: +# 1. Move `automated` block from gitlab/Application.yaml here +# 2. Remove `automated` from gitlab/Application.yaml +# 3. argocd app delete gitlab + argocd app sync forgejo + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: forgejo + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://code.forgejo.org/forgejo-helm/ + chart: forgejo + targetRevision: 9.0.6 + helm: + releaseName: forgejo + valuesObject: + gitea: + admin: + existingSecret: forgejo-initial-admin + persistence: + enabled: true + size: 20Gi + storageClass: zeta-local-path # or longhorn for HA + postgresql: + enabled: true # ships its own postgres; swap to CockroachDB later + destination: + server: https://kubernetes.default.svc + namespace: forgejo + # Manual sync only by default — see header comment for swap procedure. + syncPolicy: + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/gitlab/Application.yaml b/full-ai-cluster/k8s/applications/gitlab/Application.yaml new file mode 100644 index 0000000000..957b1fb820 --- /dev/null +++ b/full-ai-cluster/k8s/applications/gitlab/Application.yaml @@ -0,0 +1,39 @@ +# GitLab CE — self-hosted Git. Heavier than Forgejo; pick one. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: gitlab + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://charts.gitlab.io/ + chart: gitlab + targetRevision: 8.7.0 + helm: + releaseName: gitlab + valuesObject: + global: + edition: ce + hosts: + domain: gitlab.zeta.local + ingress: { tls: { enabled: false } } # turn on after cert-manager + initialRootPassword: + secret: gitlab-initial-root-password + key: password + # Disable bundled components — cluster has its own. + certmanager: { install: false } + nginx-ingress: { enabled: false } + prometheus: { install: false } + gitlab-runner: + install: true + runners: { privileged: false, tags: "kubernetes,zeta-cluster" } + destination: + server: https://kubernetes.default.svc + namespace: gitlab + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true, SkipDryRunOnMissingResource=true ] + retry: { limit: 5, backoff: { duration: 30s, factor: 2, maxDuration: 5m } } diff --git a/full-ai-cluster/k8s/applications/hermes/Application.yaml b/full-ai-cluster/k8s/applications/hermes/Application.yaml new file mode 100644 index 0000000000..5cd833eb9e --- /dev/null +++ b/full-ai-cluster/k8s/applications/hermes/Application.yaml @@ -0,0 +1,37 @@ +# Hermes — TODO: AMBIGUOUS COMPONENT. +# +# Possibilities: +# - Cosmos Hermes IBC relayer (https://github.com/informalsystems/hermes) +# - Comma.ai Hermes +# - Hermes message broker (multiple projects with this name) +# - An Aaron-specific Hermes (AI agent, terminal-tooling, etc.) +# +# Per spec: "integrated with OZ" + "SOPS into Hermes Docker image" + +# "Hermes access to Ollama or vLLM" — these hint at an AI-agent +# Hermes that talks to OZ + needs Ollama/vLLM access + has secrets +# baked at image-build via SOPS. +# +# This Application points at local manifests so you can drop the +# real image + supporting config without changing the Application +# itself. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: hermes + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://github.com/Lucent-Financial-Group/Zeta + targetRevision: main + path: full-ai-cluster/k8s/applications/hermes + directory: + include: '{namespace,deployment,service,rbac}.yaml' + destination: + server: https://kubernetes.default.svc + namespace: hermes + syncPolicy: + automated: { prune: true, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/hermes/deployment.yaml b/full-ai-cluster/k8s/applications/hermes/deployment.yaml new file mode 100644 index 0000000000..41f995723e --- /dev/null +++ b/full-ai-cluster/k8s/applications/hermes/deployment.yaml @@ -0,0 +1,80 @@ +# Hermes — custom AI agent oriented at CLOUD LLM endpoints. +# +# The image at `ghcr.io/lucent-financial-group/zeta-hermes` is built +# via the Docker module (NixFlake) on a maintainer host. SOPS-decrypted +# CLOUD API KEYS are baked into the image at build time so the running +# container doesn't need access to the SOPS keys. +# +# Build pipeline: +# 1. sops -d encrypted/cloud-keys.env > secrets/cloud-keys.env +# 2. docker buildx build --secret id=hermes-secrets,src=secrets/ ... +# 3. docker push ghcr.io/lucent-financial-group/zeta-hermes:vN.N.N +# 4. Bump `image:` below + commit + push +# +# Hermes' connections (cloud-only for now; local models deferred): +# - OpenZiti (OZ) for zero-trust transport: +# ziti-controller.openziti.svc.cluster.local:443 +# - Cloud LLM APIs via baked-in keys: +# Anthropic Claude +# OpenAI +# (add more as the SOPS file grows) +# - Hindsight (memory plugin) — once its Application lands, point at +# hindsight.hindsight.svc.cluster.local +# +# Local LLM serving (Ollama/vLLM) endpoints are kept in commented-out +# form below for when local models come back online. + +apiVersion: v1 +kind: Namespace +metadata: + name: hermes +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hermes + namespace: hermes +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: hermes + namespace: hermes +spec: + replicas: 0 # set >=1 once a real image exists + selector: + matchLabels: { app.kubernetes.io/name: hermes } + template: + metadata: + labels: { app.kubernetes.io/name: hermes } + spec: + serviceAccountName: hermes + containers: + - name: hermes + image: ghcr.io/lucent-financial-group/zeta-hermes:placeholder + env: + # OpenZiti transport + - { name: OZ_CONTROLLER_URL, + value: "https://ziti-controller.openziti.svc.cluster.local:443" } + # Cloud LLM providers — API keys baked in at image build via SOPS + - { name: LLM_PROVIDER, value: "anthropic" } # or "openai" / "bedrock" + # Hindsight memory backend (Application lands separately) + - { name: HINDSIGHT_URL, + value: "http://hindsight.hindsight.svc.cluster.local" } + # Local LLM endpoints — kept commented for when local models are re-enabled: + # - { name: OLLAMA_ENDPOINT, value: "http://ollama.ollama.svc.cluster.local:11434" } + # - { name: VLLM_ENDPOINT, value: "http://vllm.vllm.svc.cluster.local:8000" } + resources: + requests: { cpu: "200m", memory: "256Mi" } + limits: { cpu: "1", memory: "1Gi" } +--- +apiVersion: v1 +kind: Service +metadata: + name: hermes + namespace: hermes +spec: + type: ClusterIP + selector: { app.kubernetes.io/name: hermes } + ports: + - { name: http, port: 80, targetPort: 8080 } diff --git a/full-ai-cluster/k8s/applications/hindsight/Application.yaml b/full-ai-cluster/k8s/applications/hindsight/Application.yaml new file mode 100644 index 0000000000..defa46109a --- /dev/null +++ b/full-ai-cluster/k8s/applications/hindsight/Application.yaml @@ -0,0 +1,46 @@ +# Hindsight — agent persistent memory system for Hermes. +# Standalone Helm chart deployed via ArgoCD. +# +# TODO(maintainer): provide the Helm chart URL + chart name + version. +# Confirm which Hindsight chart this refers to: +# - public OSS chart (helm repo URL) +# - private chart (repoURL + auth) +# - in-repo chart (sibling repo URL + path) +# +# Once repoURL + chart name + version are provided, this Application +# wires up directly. Until then, this placeholder declares the +# namespace + intent so the structure is in place. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: hindsight + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + # TODO(maintainer): replace with the real Helm repo + chart name. + # Example shape: + # repoURL: https://your-org.github.io/hindsight-chart/ + # chart: hindsight + # targetRevision: 1.0.0 + # helm: + # releaseName: hindsight + # valuesObject: + # persistence: + # storageClass: longhorn + # size: 20Gi + # hermesIntegration: + # enabled: true + repoURL: https://github.com/Lucent-Financial-Group/Zeta + targetRevision: main + path: full-ai-cluster/k8s/applications/hindsight + directory: + include: 'namespace.yaml' + destination: + server: https://kubernetes.default.svc + namespace: hindsight + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/hindsight/namespace.yaml b/full-ai-cluster/k8s/applications/hindsight/namespace.yaml new file mode 100644 index 0000000000..25ba85ad9a --- /dev/null +++ b/full-ai-cluster/k8s/applications/hindsight/namespace.yaml @@ -0,0 +1,10 @@ +# Namespace placeholder. Replaced by real Hindsight manifests +# once the chart URL is provided. + +apiVersion: v1 +kind: Namespace +metadata: + name: hindsight + labels: + app.kubernetes.io/part-of: zeta + zeta.io/integrates-with: hermes diff --git a/full-ai-cluster/k8s/applications/istio/Application.yaml b/full-ai-cluster/k8s/applications/istio/Application.yaml new file mode 100644 index 0000000000..9f25a77fc1 --- /dev/null +++ b/full-ai-cluster/k8s/applications/istio/Application.yaml @@ -0,0 +1,31 @@ +# Istio — service mesh. Sidecar OR ambient mode (ambient = no +# sidecar per pod; lighter). Cilium handles low-level networking; +# Istio handles L7 routing / auth / mTLS. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: istio + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://istio-release.storage.googleapis.com/charts + chart: base + targetRevision: 1.24.0 + helm: + releaseName: istio-base + valuesObject: + defaultRevision: default + destination: + server: https://kubernetes.default.svc + namespace: istio-system + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] +# NOTE: Istio actually needs THREE Helm releases to be useful +# (base, istiod, gateway). This Application installs the CRDs (base); +# add istio-istiod/Application.yaml + istio-gateway/Application.yaml +# in follow-up dirs once base is healthy. Splitting the apps lets +# ArgoCD sync them in the right order. diff --git a/full-ai-cluster/k8s/applications/kube-prometheus-stack/Application.yaml b/full-ai-cluster/k8s/applications/kube-prometheus-stack/Application.yaml new file mode 100644 index 0000000000..c4440bae66 --- /dev/null +++ b/full-ai-cluster/k8s/applications/kube-prometheus-stack/Application.yaml @@ -0,0 +1,56 @@ +# Prometheus + Grafana + Alertmanager (kube-prometheus-stack). + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: kube-prometheus-stack + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://prometheus-community.github.io/helm-charts + chart: kube-prometheus-stack + targetRevision: 65.5.0 + helm: + releaseName: kube-prometheus-stack + valuesObject: + prometheus: + prometheusSpec: + retention: 15d + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: longhorn + accessModes: [ ReadWriteOnce ] + resources: { requests: { storage: 100Gi } } + grafana: + # Admin password sourced from a Secret rather than + # hardcoded. Create the secret BEFORE this app syncs: + # kubectl -n monitoring create secret generic grafana-admin-credentials \ + # --from-literal=admin-user=admin \ + # --from-literal=admin-password="$(openssl rand -hex 16)" + # Or use a Sealed Secret committed to Git so the credential + # is reproducible from the cluster definition. + admin: + existingSecret: grafana-admin-credentials + userKey: admin-user + passwordKey: admin-password + persistence: + enabled: true + storageClassName: longhorn + size: 10Gi + alertmanager: + alertmanagerSpec: + storage: + volumeClaimTemplate: + spec: + storageClassName: longhorn + accessModes: [ ReadWriteOnce ] + resources: { requests: { storage: 10Gi } } + destination: + server: https://kubernetes.default.svc + namespace: monitoring + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/loki/Application.yaml b/full-ai-cluster/k8s/applications/loki/Application.yaml new file mode 100644 index 0000000000..7ee2b2b375 --- /dev/null +++ b/full-ai-cluster/k8s/applications/loki/Application.yaml @@ -0,0 +1,45 @@ +# Loki — log aggregation. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: loki + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://grafana.github.io/helm-charts + chart: loki + targetRevision: 6.18.0 + helm: + releaseName: loki + valuesObject: + deploymentMode: SimpleScalable + loki: + auth_enabled: false + schemaConfig: + configs: + - from: "2024-01-01" + store: tsdb + object_store: s3 + schema: v13 + index: { prefix: loki_index_, period: 24h } + storage: + type: s3 + # TODO: point at an in-cluster S3 (MinIO, SeaweedFS) or + # external S3-compatible storage. For now, bucket lives + # in a placeholder values-set you wire after picking + # object-storage backend. + bucketNames: { chunks: loki-chunks, ruler: loki-ruler } + write: { replicas: 2 } + read: { replicas: 2 } + backend: { replicas: 2 } + chunksCache: { enabled: true } + resultsCache: { enabled: true } + destination: + server: https://kubernetes.default.svc + namespace: loki + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/longhorn/Application.yaml b/full-ai-cluster/k8s/applications/longhorn/Application.yaml new file mode 100644 index 0000000000..bdbce9cf78 --- /dev/null +++ b/full-ai-cluster/k8s/applications/longhorn/Application.yaml @@ -0,0 +1,34 @@ +# Longhorn — distributed block storage. Stateful workloads land here +# (Postgres for GitLab, CockroachDB data dirs, Weaviate, etc.). +# Local-path storage (NixFlake module) covers stateless workloads. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: longhorn + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://charts.longhorn.io + chart: longhorn + targetRevision: 1.7.2 + helm: + releaseName: longhorn + valuesObject: + defaultSettings: + defaultDataPath: /var/lib/longhorn + defaultReplicaCount: 2 # bump to 3 once 3+ workers exist + persistence: + defaultClass: false + defaultClassReplicaCount: 2 + reclaimPolicy: Retain + ingress: + enabled: false # turn on after cert-manager + ingress-nginx + destination: + server: https://kubernetes.default.svc + namespace: longhorn-system + syncPolicy: + automated: { prune: false, selfHeal: true } # never prune storage + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/mimir/Application.yaml b/full-ai-cluster/k8s/applications/mimir/Application.yaml new file mode 100644 index 0000000000..312d1acdb0 --- /dev/null +++ b/full-ai-cluster/k8s/applications/mimir/Application.yaml @@ -0,0 +1,46 @@ +# Mimir — long-term metrics storage. Complements kube-prometheus-stack +# which keeps recent metrics; Mimir holds the long tail for trend +# analysis. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: mimir + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://grafana.github.io/helm-charts + chart: mimir-distributed + targetRevision: 5.5.1 + helm: + releaseName: mimir + valuesObject: + metaMonitoring: + serviceMonitor: { enabled: false } + # MinIO bundled for object storage. Swap to external S3 + # once you stand up real object storage. + minio: + enabled: true + persistence: + storageClass: longhorn + size: 100Gi + ingester: + persistentVolume: + storageClass: longhorn + size: 50Gi + store_gateway: + persistentVolume: + storageClass: longhorn + size: 50Gi + compactor: + persistentVolume: + storageClass: longhorn + size: 50Gi + destination: + server: https://kubernetes.default.svc + namespace: mimir + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/nats/Application.yaml b/full-ai-cluster/k8s/applications/nats/Application.yaml new file mode 100644 index 0000000000..c2102a6b1f --- /dev/null +++ b/full-ai-cluster/k8s/applications/nats/Application.yaml @@ -0,0 +1,35 @@ +# NATS — messaging. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: nats + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://nats-io.github.io/k8s/helm/charts/ + chart: nats + targetRevision: 1.2.7 + helm: + releaseName: nats + valuesObject: + config: + jetstream: + enabled: true + fileStore: + pvc: + size: 20Gi + storageClassName: longhorn + cluster: + enabled: true + replicas: 3 + natsBox: + enabled: true + destination: + server: https://kubernetes.default.svc + namespace: nats + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/ollama/Application.yaml b/full-ai-cluster/k8s/applications/ollama/Application.yaml new file mode 100644 index 0000000000..61ba170ada --- /dev/null +++ b/full-ai-cluster/k8s/applications/ollama/Application.yaml @@ -0,0 +1,63 @@ +# Ollama — LLM serving (option A). +# Schedules onto worker-gpu nodes via nvidia.com/gpu request. +# +# DEFERRED: local-models phase is on hold per "we only care about +# cloud right now." The Application is shipped in scaled-to-zero +# form so the topology is preserved + the chart pin is reviewable, +# but NO pods schedule + NO models pull until a maintainer: +# 1. Removes the `replicaCount: 0` override below +# 2. Re-enables automated sync (currently set to manual) +# 3. Adds models back to `ollama.models.pull` + `.run` + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ollama + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://otwld.github.io/ollama-helm/ + chart: ollama + targetRevision: 1.6.0 + helm: + releaseName: ollama + valuesObject: + replicaCount: 0 # ← deferred local-models phase + ollama: + gpu: + enabled: true + type: nvidia + number: 1 + models: + # Pull + run lists intentionally empty — no model pull + # at deploy time. Re-enable when the local phase comes + # back online (example values left as comments): + pull: [ ] + # - deepseek-coder:33b + # - qwen2.5-coder:32b + run: [ ] + # - deepseek-coder:33b + # - qwen2.5-coder:32b + persistentVolume: + enabled: true + size: 200Gi + storageClass: longhorn + nodeSelector: + zeta.io/gpu: nvidia + resources: + requests: { cpu: "2", memory: "8Gi", "nvidia.com/gpu": 1 } + limits: { cpu: "8", memory: "32Gi", "nvidia.com/gpu": 1 } + service: + type: ClusterIP + port: 11434 + destination: + server: https://kubernetes.default.svc + namespace: ollama + # syncPolicy without `automated` — manual-sync-only so ArgoCD + # doesn't reconcile this app on its own during the deferred + # phase. Maintainer triggers via `argocd app sync ollama` when + # ready to enable local serving. + syncPolicy: + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/open-policy-agent/Application.yaml b/full-ai-cluster/k8s/applications/open-policy-agent/Application.yaml new file mode 100644 index 0000000000..5e6cecd10c --- /dev/null +++ b/full-ai-cluster/k8s/applications/open-policy-agent/Application.yaml @@ -0,0 +1,28 @@ +# Open Policy Agent (Gatekeeper) — admission policy. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: open-policy-agent + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://open-policy-agent.github.io/gatekeeper/charts + chart: gatekeeper + targetRevision: 3.18.1 + helm: + releaseName: gatekeeper + valuesObject: + replicas: 3 + controllerManager: + metricsPort: 8888 + validatingWebhookFailurePolicy: Fail + mutatingWebhookFailurePolicy: Ignore # safer default while iterating + destination: + server: https://kubernetes.default.svc + namespace: gatekeeper-system + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/orleans/Application.yaml b/full-ai-cluster/k8s/applications/orleans/Application.yaml new file mode 100644 index 0000000000..58613554b6 --- /dev/null +++ b/full-ai-cluster/k8s/applications/orleans/Application.yaml @@ -0,0 +1,33 @@ +# Orleans — distributed actor runtime / "distributed cron" #1. +# +# Orleans doesn't have an official Helm chart. The pattern is to +# build a custom Silo image embedding your grain code and deploy it +# as a StatefulSet. This Application points at the manifests in +# the same directory; bump replicas to >=1 and replace the image +# once you have a published silo image. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: orleans + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/Lucent-Financial-Group/Zeta + targetRevision: main + path: full-ai-cluster/k8s/applications/orleans + directory: + include: '{namespace,rbac,configmap,service,statefulset}.yaml' + destination: + server: https://kubernetes.default.svc + namespace: orleans + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/full-ai-cluster/k8s/applications/orleans/configmap.yaml b/full-ai-cluster/k8s/applications/orleans/configmap.yaml new file mode 100644 index 0000000000..6b8f808a0f --- /dev/null +++ b/full-ai-cluster/k8s/applications/orleans/configmap.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: orleans-config + namespace: orleans +data: + cluster.json: | + { + "serviceId": "zeta", + "clusterId": "zeta-prod", + "silo": { "siloPort": 11111, "gatewayPort": 30000 }, + "clustering": { "provider": "kubernetes", "namespace": "orleans" }, + "telemetry": { "dashboard": { "enabled": true, "port": 8080 } } + } diff --git a/full-ai-cluster/k8s/applications/orleans/namespace.yaml b/full-ai-cluster/k8s/applications/orleans/namespace.yaml new file mode 100644 index 0000000000..d74fa38ee5 --- /dev/null +++ b/full-ai-cluster/k8s/applications/orleans/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: orleans + labels: + app.kubernetes.io/part-of: zeta + zeta.io/distributed-cron: "true" diff --git a/full-ai-cluster/k8s/applications/orleans/rbac.yaml b/full-ai-cluster/k8s/applications/orleans/rbac.yaml new file mode 100644 index 0000000000..e2ecf6d6c0 --- /dev/null +++ b/full-ai-cluster/k8s/applications/orleans/rbac.yaml @@ -0,0 +1,35 @@ +# Orleans needs to discover sibling silos via the K8s API +# (Orleans Kubernetes clustering provider). + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: orleans-silo + namespace: orleans +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: orleans-silo + namespace: orleans +rules: + - apiGroups: [""] + resources: ["pods", "endpoints"] + verbs: ["get", "list", "watch"] + - apiGroups: ["apps"] + resources: ["statefulsets"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: orleans-silo + namespace: orleans +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: orleans-silo +subjects: + - kind: ServiceAccount + name: orleans-silo + namespace: orleans diff --git a/full-ai-cluster/k8s/applications/orleans/service.yaml b/full-ai-cluster/k8s/applications/orleans/service.yaml new file mode 100644 index 0000000000..2a62577556 --- /dev/null +++ b/full-ai-cluster/k8s/applications/orleans/service.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Service +metadata: + name: orleans-silo + namespace: orleans +spec: + clusterIP: None + selector: { app.kubernetes.io/name: orleans-silo } + ports: + - { name: silo, port: 11111, targetPort: silo } + - { name: gateway, port: 30000, targetPort: gateway } +--- +apiVersion: v1 +kind: Service +metadata: + name: orleans-gateway + namespace: orleans +spec: + type: ClusterIP + selector: { app.kubernetes.io/name: orleans-silo } + ports: + - { name: gateway, port: 30000, targetPort: gateway } +--- +apiVersion: v1 +kind: Service +metadata: + name: orleans-dashboard + namespace: orleans +spec: + type: ClusterIP + selector: { app.kubernetes.io/name: orleans-silo } + ports: + - { name: http, port: 8080, targetPort: dashboard } diff --git a/full-ai-cluster/k8s/applications/orleans/statefulset.yaml b/full-ai-cluster/k8s/applications/orleans/statefulset.yaml new file mode 100644 index 0000000000..46fb815da5 --- /dev/null +++ b/full-ai-cluster/k8s/applications/orleans/statefulset.yaml @@ -0,0 +1,50 @@ +# Orleans Silo StatefulSet. Replicas start at 0 until you publish +# a real silo image. Replace `image:` and bump `replicas:` >= 1. + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: orleans-silo + namespace: orleans +spec: + serviceName: orleans-silo + replicas: 0 + selector: + matchLabels: { app.kubernetes.io/name: orleans-silo } + podManagementPolicy: Parallel + template: + metadata: + labels: + app.kubernetes.io/name: orleans-silo + orleans.io/serviceId: zeta + orleans.io/clusterId: zeta-prod + spec: + serviceAccountName: orleans-silo + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: { app.kubernetes.io/name: orleans-silo } + containers: + - name: silo + image: ghcr.io/lucent-financial-group/zeta-orleans-silo:latest + ports: + - { name: silo, containerPort: 11111 } + - { name: gateway, containerPort: 30000 } + - { name: dashboard, containerPort: 8080 } + env: + - { name: ORLEANS_SERVICE_ID, value: zeta } + - { name: ORLEANS_CLUSTER_ID, value: zeta-prod } + - { name: POD_NAME, valueFrom: { fieldRef: { fieldPath: metadata.name } } } + - { name: POD_NAMESPACE, valueFrom: { fieldRef: { fieldPath: metadata.namespace } } } + - { name: POD_IP, valueFrom: { fieldRef: { fieldPath: status.podIP } } } + volumeMounts: + - { name: config, mountPath: /etc/orleans, readOnly: true } + resources: + requests: { cpu: "500m", memory: "512Mi" } + limits: { cpu: "2", memory: "2Gi" } + livenessProbe: { tcpSocket: { port: silo }, initialDelaySeconds: 30, periodSeconds: 15 } + readinessProbe: { tcpSocket: { port: gateway }, initialDelaySeconds: 10, periodSeconds: 5 } + volumes: + - { name: config, configMap: { name: orleans-config } } diff --git a/full-ai-cluster/k8s/applications/oz/Application.yaml b/full-ai-cluster/k8s/applications/oz/Application.yaml new file mode 100644 index 0000000000..c898fadffa --- /dev/null +++ b/full-ai-cluster/k8s/applications/oz/Application.yaml @@ -0,0 +1,51 @@ +# OZ → OpenZiti. Zero-trust overlay network with per-connection +# authentication + L4 mTLS. +# +# OpenZiti splits into multiple components; this Application installs +# the controller. Add ziti-router/Application.yaml in a sibling dir +# when the first edge router lands. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: openziti-controller + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://docs.openziti.io/helm-charts/ + chart: ziti-controller + targetRevision: 1.4.5 + helm: + releaseName: ziti-controller + valuesObject: + ctrlPlane: + service: + type: ClusterIP + clientApi: + service: + type: ClusterIP + persistence: + enabled: true + storageClass: longhorn + size: 5Gi + # Admin password sourced from a Secret rather than hardcoded. + # Create the secret BEFORE this app syncs: + # kubectl -n openziti create secret generic ziti-admin-credentials \ + # --from-literal=password="$(openssl rand -hex 24)" + # Or use a Sealed Secret committed to Git so the credential is + # reproducible from the cluster definition. + adminSecret: + name: ziti-admin-credentials + key: password + destination: + server: https://kubernetes.default.svc + namespace: openziti + syncPolicy: + automated: + prune: false # never prune — would break the trust fabric + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/full-ai-cluster/k8s/applications/qwen-coder/Application.yaml b/full-ai-cluster/k8s/applications/qwen-coder/Application.yaml new file mode 100644 index 0000000000..2968f0df9d --- /dev/null +++ b/full-ai-cluster/k8s/applications/qwen-coder/Application.yaml @@ -0,0 +1,23 @@ +# Qwen Coder — same pattern as deepseek-coder/. Served by Ollama +# or vLLM via the same endpoints. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: qwen-coder + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://github.com/Lucent-Financial-Group/Zeta + targetRevision: main + path: full-ai-cluster/k8s/applications/qwen-coder + directory: + include: '{namespace,configmap}.yaml' + destination: + server: https://kubernetes.default.svc + namespace: models + syncPolicy: + automated: { prune: true, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/qwen-coder/configmap.yaml b/full-ai-cluster/k8s/applications/qwen-coder/configmap.yaml new file mode 100644 index 0000000000..c7e4dba477 --- /dev/null +++ b/full-ai-cluster/k8s/applications/qwen-coder/configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: qwen-coder + namespace: models +data: + model: "qwen2.5-coder:32b" + served-by: "ollama|vllm" + endpoint-ollama: "http://ollama.ollama.svc.cluster.local:11434" + endpoint-vllm: "http://vllm.vllm.svc.cluster.local:8000" + size-vram-gb: "24" + license: "tongyi-qianwen-license" diff --git a/full-ai-cluster/k8s/applications/redis/Application.yaml b/full-ai-cluster/k8s/applications/redis/Application.yaml new file mode 100644 index 0000000000..7c1750676f --- /dev/null +++ b/full-ai-cluster/k8s/applications/redis/Application.yaml @@ -0,0 +1,39 @@ +# Redis — cache. Bitnami chart for the standard ergonomics. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: redis + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://charts.bitnami.com/bitnami + chart: redis + targetRevision: 20.5.0 + helm: + releaseName: redis + valuesObject: + architecture: replication + auth: + enabled: true + existingSecret: redis-auth # create via Sealed Secret / Vault + existingSecretPasswordKey: password + master: + persistence: + enabled: true + storageClass: longhorn + size: 10Gi + replica: + replicaCount: 2 + persistence: + enabled: true + storageClass: longhorn + size: 10Gi + destination: + server: https://kubernetes.default.svc + namespace: redis + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/sealed-secrets/Application.yaml b/full-ai-cluster/k8s/applications/sealed-secrets/Application.yaml new file mode 100644 index 0000000000..df31cc3a41 --- /dev/null +++ b/full-ai-cluster/k8s/applications/sealed-secrets/Application.yaml @@ -0,0 +1,26 @@ +# Sealed Secrets — encrypted secrets in Git, decrypted by the +# controller at apply time. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: sealed-secrets + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://bitnami-labs.github.io/sealed-secrets + chart: sealed-secrets + targetRevision: 2.16.2 + helm: + releaseName: sealed-secrets + valuesObject: + fullnameOverride: sealed-secrets-controller + keyrenewperiod: 720h # 30 days + destination: + server: https://kubernetes.default.svc + namespace: kube-system + syncPolicy: + automated: { prune: false, selfHeal: true } # never prune — would orphan secrets + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/tempo/Application.yaml b/full-ai-cluster/k8s/applications/tempo/Application.yaml new file mode 100644 index 0000000000..87dc33d692 --- /dev/null +++ b/full-ai-cluster/k8s/applications/tempo/Application.yaml @@ -0,0 +1,33 @@ +# Tempo — distributed traces. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: tempo + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://grafana.github.io/helm-charts + chart: tempo + targetRevision: 1.18.0 + helm: + releaseName: tempo + valuesObject: + tempo: + storage: + trace: + backend: local + local: + path: /var/tempo/traces + persistence: + enabled: true + storageClassName: longhorn + size: 50Gi + destination: + server: https://kubernetes.default.svc + namespace: tempo + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/temporal/Application.yaml b/full-ai-cluster/k8s/applications/temporal/Application.yaml new file mode 100644 index 0000000000..4d16e8b6e7 --- /dev/null +++ b/full-ai-cluster/k8s/applications/temporal/Application.yaml @@ -0,0 +1,46 @@ +# Temporal (TS workers) — distributed cron #2. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: temporal + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://go.temporal.io/helm-charts + chart: temporal + targetRevision: 0.59.0 + helm: + releaseName: temporal + valuesObject: + server: + replicaCount: 1 + cassandra: + enabled: false + elasticsearch: + enabled: false + prometheus: + enabled: false + grafana: + enabled: false + # CockroachDB (via its ArgoCD app) is the persistence layer. + # Wire after cockroachdb/Application.yaml comes up: + # server: + # config: + # persistence: + # default: + # driver: sql + # sql: + # driver: postgres12 + # host: cockroachdb-public.cockroachdb.svc.cluster.local + # port: 26257 + # database: temporal + # user: root + destination: + server: https://kubernetes.default.svc + namespace: temporal + syncPolicy: + automated: { prune: true, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/vault/Application.yaml b/full-ai-cluster/k8s/applications/vault/Application.yaml new file mode 100644 index 0000000000..6a0ffa492e --- /dev/null +++ b/full-ai-cluster/k8s/applications/vault/Application.yaml @@ -0,0 +1,49 @@ +# HashiCorp Vault — runtime secrets engine + Vault Agent injector +# for pods. Pair with Sealed Secrets (above): Sealed for config-style +# secrets, Vault for dynamic + rotated secrets + audit trail. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: vault + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://helm.releases.hashicorp.com + chart: vault + targetRevision: 0.29.1 + helm: + releaseName: vault + valuesObject: + global: + enabled: true + tlsDisable: false # generate TLS via cert-manager or self-signed + server: + ha: + enabled: true + replicas: 3 + raft: + enabled: true + setNodeId: true + dataStorage: + enabled: true + storageClass: longhorn + size: 20Gi + auditStorage: + enabled: true + storageClass: longhorn + size: 10Gi + injector: + enabled: true + replicas: 2 + ui: + enabled: true + serviceType: ClusterIP + destination: + server: https://kubernetes.default.svc + namespace: vault + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/vllm/Application.yaml b/full-ai-cluster/k8s/applications/vllm/Application.yaml new file mode 100644 index 0000000000..2a82341557 --- /dev/null +++ b/full-ai-cluster/k8s/applications/vllm/Application.yaml @@ -0,0 +1,32 @@ +# vLLM — LLM serving (option B). Higher throughput than Ollama for +# production-shape inference; harder to swap models on the fly. +# Choose one of Ollama / vLLM (or run both targeting different model +# tiers — Ollama for quick interactive, vLLM for high-concurrency). +# +# No official Helm chart from the vLLM project. We point at a +# community chart (substratusai/llm-operator-style) or hand-rolled +# manifests in this directory. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: vllm + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://github.com/Lucent-Financial-Group/Zeta + targetRevision: main + path: full-ai-cluster/k8s/applications/vllm + directory: + include: '{namespace,deployment,service}.yaml' + destination: + server: https://kubernetes.default.svc + namespace: vllm + # DEFERRED: local-models phase on hold. The deployment.yaml ships + # replicas: 0 (already) AND syncPolicy is manual-only so ArgoCD + # doesn't reconcile until a maintainer explicitly triggers + # `argocd app sync vllm`. Mirrors the ollama Application's gating. + syncPolicy: + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/applications/vllm/deployment.yaml b/full-ai-cluster/k8s/applications/vllm/deployment.yaml new file mode 100644 index 0000000000..bb27f19fe3 --- /dev/null +++ b/full-ai-cluster/k8s/applications/vllm/deployment.yaml @@ -0,0 +1,69 @@ +# vLLM serving deployment. Replicas start at 0 — choose a model +# (image tag) and bump to 1+ once you've picked between Ollama and +# vLLM (or which models each one serves). + +apiVersion: v1 +kind: Namespace +metadata: + name: vllm +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vllm + namespace: vllm +spec: + replicas: 0 + selector: + matchLabels: { app.kubernetes.io/name: vllm } + template: + metadata: + labels: { app.kubernetes.io/name: vllm } + spec: + nodeSelector: + zeta.io/gpu: nvidia + containers: + - name: vllm + image: vllm/vllm-openai:latest + args: + # Defaults — override per model by tweaking these args: + - --model + - deepseek-ai/deepseek-coder-33b-instruct + - --tensor-parallel-size + - "1" + - --host + - "0.0.0.0" + - --port + - "8000" + ports: + - { containerPort: 8000, name: http } + resources: + requests: { cpu: "4", memory: "16Gi", "nvidia.com/gpu": 1 } + limits: { cpu: "8", memory: "64Gi", "nvidia.com/gpu": 1 } + volumeMounts: + - { name: cache, mountPath: /root/.cache/huggingface } + volumes: + - name: cache + persistentVolumeClaim: { claimName: vllm-cache } +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: vllm-cache + namespace: vllm +spec: + accessModes: [ ReadWriteOnce ] + storageClassName: longhorn + resources: + requests: { storage: 200Gi } +--- +apiVersion: v1 +kind: Service +metadata: + name: vllm + namespace: vllm +spec: + type: ClusterIP + selector: { app.kubernetes.io/name: vllm } + ports: + - { name: http, port: 8000, targetPort: 8000 } diff --git a/full-ai-cluster/k8s/applications/weaviate/Application.yaml b/full-ai-cluster/k8s/applications/weaviate/Application.yaml new file mode 100644 index 0000000000..f7ac07186d --- /dev/null +++ b/full-ai-cluster/k8s/applications/weaviate/Application.yaml @@ -0,0 +1,39 @@ +# Weaviate — vector DB for RAG + embeddings. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: weaviate + namespace: argocd + finalizers: [ resources-finalizer.argocd.argoproj.io ] +spec: + project: default + source: + repoURL: https://weaviate.github.io/weaviate-helm + chart: weaviate + targetRevision: 17.6.0 + helm: + releaseName: weaviate + valuesObject: + replicas: 1 # bump to 3 for HA once cluster has the headroom + storage: + size: 100Gi + storageClassName: longhorn + modules: + # Use the cluster's own LLM serving for vectorization + + # generative endpoints. Swap to text-embeddings-inference + # if you want a separate embedding service. + text2vec-ollama: + enabled: true + apiEndpoint: http://ollama.ollama.svc.cluster.local:11434 + modelId: nomic-embed-text + generative-ollama: + enabled: true + apiEndpoint: http://ollama.ollama.svc.cluster.local:11434 + modelId: qwen2.5-coder:32b + destination: + server: https://kubernetes.default.svc + namespace: weaviate + syncPolicy: + automated: { prune: false, selfHeal: true } + syncOptions: [ CreateNamespace=true, ServerSideApply=true ] diff --git a/full-ai-cluster/k8s/bootstrap/argocd-install.yaml b/full-ai-cluster/k8s/bootstrap/argocd-install.yaml new file mode 100644 index 0000000000..968857634d --- /dev/null +++ b/full-ai-cluster/k8s/bootstrap/argocd-install.yaml @@ -0,0 +1,40 @@ +# full-ai-cluster/k8s/bootstrap/argocd-install.yaml +# +# K3S HelmChart CR (`helm.cattle.io/v1`) — K3S's Helm Controller +# resolves the chart at install time and applies the rendered +# manifests. This is the native K3S pattern for "install Helm +# chart at boot." Plain-resources format that +# `services.k3s.manifests` actually understands. + +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: argocd + namespace: kube-system +spec: + chart: argo-cd + repo: https://argoproj.github.io/argo-helm + version: 7.7.10 + targetNamespace: argocd + createNamespace: false # argocd-namespace.yaml creates it explicitly + valuesContent: |- + global: + domain: argocd.zeta.local + server: + service: + type: ClusterIP # ingress lands separately + configs: + params: + server.insecure: true # TLS terminated upstream once ingress lands + notifications: + enabled: false + dex: + enabled: false # SSO wires in via a Sealed Secret + values patch + redis-ha: + enabled: false # single-control-plane bootstrap + controller: + replicas: 1 + repoServer: + replicas: 1 + applicationSet: + enabled: true diff --git a/full-ai-cluster/k8s/bootstrap/argocd-namespace.yaml b/full-ai-cluster/k8s/bootstrap/argocd-namespace.yaml new file mode 100644 index 0000000000..7ba39d4b7c --- /dev/null +++ b/full-ai-cluster/k8s/bootstrap/argocd-namespace.yaml @@ -0,0 +1,12 @@ +# full-ai-cluster/k8s/bootstrap/argocd-namespace.yaml +# +# ArgoCD namespace. Applied by K3S on first boot — must apply +# before argocd-install.yaml. + +apiVersion: v1 +kind: Namespace +metadata: + name: argocd + labels: + app.kubernetes.io/name: argocd + app.kubernetes.io/part-of: argocd diff --git a/full-ai-cluster/k8s/bootstrap/cilium-install.yaml b/full-ai-cluster/k8s/bootstrap/cilium-install.yaml new file mode 100644 index 0000000000..c7d7100158 --- /dev/null +++ b/full-ai-cluster/k8s/bootstrap/cilium-install.yaml @@ -0,0 +1,42 @@ +# full-ai-cluster/k8s/bootstrap/cilium-install.yaml +# +# K3S HelmChart CR — K3S's Helm Controller installs Cilium at +# first boot. This is the only thing that gets a CNI running before +# ArgoCD's own pods need to schedule (chicken-and-egg avoidance). +# +# After cluster is up, the ArgoCD Cilium Application at +# k8s/applications/cilium/Application.yaml adopts the running +# install and reconciles ongoing changes. + +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: cilium + namespace: kube-system +spec: + chart: cilium + repo: https://helm.cilium.io + version: 1.16.5 + targetNamespace: kube-system + valuesContent: |- + kubeProxyReplacement: true + k8sServiceHost: control-plane.zeta.local + k8sServicePort: 6443 + ipam: + mode: cluster-pool + operator: + clusterPoolIPv4PodCIDRList: [ "10.42.0.0/16" ] + bpf: + masquerade: true + routingMode: native + ipv4NativeRoutingCIDR: "10.42.0.0/16" + autoDirectNodeRoutes: true + hubble: + enabled: true + relay: { enabled: true } + ui: { enabled: true } + metrics: + enabled: [ dns, drop, tcp, flow, icmp, http ] + enableOpenMetrics: true + operator: + replicas: 1 diff --git a/full-ai-cluster/k8s/bootstrap/cilium-namespace.yaml b/full-ai-cluster/k8s/bootstrap/cilium-namespace.yaml new file mode 100644 index 0000000000..d7957e2896 --- /dev/null +++ b/full-ai-cluster/k8s/bootstrap/cilium-namespace.yaml @@ -0,0 +1,12 @@ +# full-ai-cluster/k8s/bootstrap/cilium-namespace.yaml +# +# Cilium runs in kube-system by convention. This file just ensures +# the namespace exists before ArgoCD's Cilium Application tries to +# create resources in it. Applied by K3S on first boot. + +apiVersion: v1 +kind: Namespace +metadata: + name: kube-system + labels: + app.kubernetes.io/managed-by: zeta-bootstrap diff --git a/full-ai-cluster/k8s/bootstrap/root-application.yaml b/full-ai-cluster/k8s/bootstrap/root-application.yaml new file mode 100644 index 0000000000..8d3d3763cc --- /dev/null +++ b/full-ai-cluster/k8s/bootstrap/root-application.yaml @@ -0,0 +1,48 @@ +# full-ai-cluster/k8s/bootstrap/root-application.yaml +# +# App-of-Apps root. K3S auto-applies after ArgoCD is running. +# ArgoCD then watches `k8s/applications/` for Application CRs and +# reconciles everything declared there. +# +# Adding a workload to the cluster: +# 1. mkdir full-ai-cluster/k8s/applications// +# 2. Author Application.yaml + any supporting manifests +# 3. git commit + push to main +# 4. ArgoCD picks it up on the next sync (~3 min) +# +# Either/or gating: some directories ship multiple alternatives +# (e.g. gitlab/ vs forgejo/, ollama/ vs vllm/). The root App-of-Apps +# picks them ALL up — gating happens at the per-Application +# `syncPolicy` level. Default-on apps keep an `automated:` block; +# alternatives omit it (manual sync only via `argocd app sync`). +# Each alternative's Application.yaml header names the swap procedure. + +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: zeta-root + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/Lucent-Financial-Group/Zeta + targetRevision: main + path: full-ai-cluster/k8s/applications + directory: + recurse: true + # Only pick up Application CRs (one per workload directory). + # Supporting manifests in the same dirs are reconciled by the + # specific app's Application, not by the root. + include: '{*/Application.yaml,Application.yaml}' + destination: + server: https://kubernetes.default.svc + namespace: argocd + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/full-ai-cluster/nixos/hosts/control-plane/README.md b/full-ai-cluster/nixos/hosts/control-plane/README.md new file mode 100644 index 0000000000..5e50858db8 --- /dev/null +++ b/full-ai-cluster/nixos/hosts/control-plane/README.md @@ -0,0 +1,44 @@ +# control-plane + +K3S server + Cilium CNI bootstrap + ArgoCD reconciler. No GPU. + +## What it runs + +- K3S server with embedded etcd, flannel + kube-proxy disabled + (Cilium takes over) +- Cilium CNI (Helm install via ArgoCD's first reconcile) +- ArgoCD itself (auto-applied from `k8s/bootstrap/` via + `services.k3s.manifests` in `k3s-server.nix`) +- Local-path storage class for stateless workloads +- Docker (for any non-K8s container tooling) + +## What it does NOT run + +- No GPU workloads (those go on `worker-gpu` hosts) +- No big AI models locally (LLMs serve from worker-gpu via Ollama/vLLM) + +## Install + +See the parent [`../../README.md`](../../README.md) bootstrap flow. +This host's `` name when installing is `control-plane`: + +```bash +nixos-install --flake /mnt/etc/zeta/full-ai-cluster#control-plane +``` + +## Post-install verification + +```bash +ssh zeta@control-plane.zeta.local +sudo kubectl get nodes +sudo kubectl -n kube-system get pods # cilium pods +sudo kubectl -n argocd get pods +sudo kubectl -n argocd get applications +sudo cilium status +sudo cilium hubble enable --ui +``` + +## Hardware config + +`hardware-configuration.nix` ships as a placeholder. Replace at +install time per the parent README. diff --git a/full-ai-cluster/nixos/hosts/control-plane/configuration.nix b/full-ai-cluster/nixos/hosts/control-plane/configuration.nix new file mode 100644 index 0000000000..1d6e3ec790 --- /dev/null +++ b/full-ai-cluster/nixos/hosts/control-plane/configuration.nix @@ -0,0 +1,32 @@ +# full-ai-cluster/nixos/hosts/control-plane/configuration.nix +# +# K3S server + ArgoCD bootstrap. Cilium CNI takes over from flannel. +# No GPU on this host — control-plane stays lean. + +{ config, pkgs, lib, ... }: + +{ + imports = [ + ./hardware-configuration.nix + ../../modules/common.nix + ../../modules/k3s-server.nix + ../../modules/docker.nix + ../../modules/local-storage.nix + ]; + + networking.hostName = "control-plane"; + + # Static IP recommended so worker nodes have a stable serverAddr. + # Per-site override here: + # networking.interfaces.eth0.ipv4.addresses = [{ + # address = "192.168.1.10"; + # prefixLength = 24; + # }]; + # networking.defaultGateway = "192.168.1.1"; + # networking.nameservers = [ "1.1.1.1" "9.9.9.9" ]; + + # Add maintainer SSH keys for the `zeta` admin user: + users.users.zeta.openssh.authorizedKeys.keys = [ + # "ssh-ed25519 AAAAC3Nz... aaron@zeta" + ]; +} diff --git a/full-ai-cluster/nixos/hosts/control-plane/hardware-configuration.nix b/full-ai-cluster/nixos/hosts/control-plane/hardware-configuration.nix new file mode 100644 index 0000000000..1e4222d2da --- /dev/null +++ b/full-ai-cluster/nixos/hosts/control-plane/hardware-configuration.nix @@ -0,0 +1,37 @@ +# full-ai-cluster/nixos/hosts/control-plane/hardware-configuration.nix +# +# PLACEHOLDER — replace during real install via: +# nixos-generate-config --root /mnt +# cp /mnt/etc/nixos/hardware-configuration.nix \ +# /mnt/etc/zeta/full-ai-cluster/nixos/hosts/control-plane/hardware-configuration.nix +# +# This stub exists so `nix flake check` succeeds in CI before the +# host is provisioned. Real generator output replaces all values. + +{ config, lib, modulesPath, ... }: + +{ + imports = [ + (modulesPath + "/installer/scan/not-detected.nix") + ]; + + boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usb_storage" "sd_mod" ]; + boot.initrd.kernelModules = [ ]; + boot.kernelModules = [ "kvm-intel" "kvm-amd" ]; + boot.extraModulePackages = [ ]; + + fileSystems."/" = lib.mkDefault { + device = "/dev/disk/by-label/nixos"; + fsType = "ext4"; + }; + + fileSystems."/boot" = lib.mkDefault { + device = "/dev/disk/by-label/boot"; + fsType = "vfat"; + }; + + swapDevices = lib.mkDefault [ ]; + + networking.useDHCP = lib.mkDefault true; + nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; +} diff --git a/full-ai-cluster/nixos/hosts/worker-gpu/README.md b/full-ai-cluster/nixos/hosts/worker-gpu/README.md new file mode 100644 index 0000000000..21d95350e4 --- /dev/null +++ b/full-ai-cluster/nixos/hosts/worker-gpu/README.md @@ -0,0 +1,60 @@ +# worker-gpu + +K3S worker template + NVIDIA GPU + container toolkit + K8s device +plugin + VFIO passthrough (optional). + +## What it runs + +- K3S agent joining `https://control-plane.zeta.local:6443` +- NVIDIA proprietary driver + `nvidia-container-toolkit` (so K3S + pods can request `nvidia.com/gpu` resources) +- NVIDIA Kubernetes device plugin DaemonSet (advertises GPUs) +- Docker (for non-K8s container workloads on this host) +- Local-path storage class + +## Optional: GPU passthrough for VM workloads + +If this host hosts VMs that need a dedicated GPU (e.g. running +a Windows VM with passthrough alongside K8s workloads on the +remaining GPUs), enable VFIO in `configuration.nix`: + +```nix +zeta.gpu-passthrough = { + enable = true; + pciIds = [ "10de:2204" "10de:1aef" ]; # find via `lspci -nn` +}; +``` + +## Mixed-vendor hosts + +If this worker has AMD or Intel GPUs alongside NVIDIA, edit the +`zeta.gpu-device-plugin.vendors` list: + +```nix +zeta.gpu-device-plugin = { + enable = true; + vendors = [ "nvidia" "amd" "intel" ]; +}; +``` + +Each enabled vendor gets its own DaemonSet advertising the +appropriate resource name to K8s. + +## Per-physical-worker scaling + +This file is a **template**. For each additional GPU worker: + +1. Copy `worker-gpu/` to `worker-gpu-NN/` +2. Update `networking.hostName` in the new copy +3. Drop the per-host `hardware-configuration.nix` from + `nixos-generate-config` +4. Add `nixosConfigurations.worker-gpu-NN = ...` entry to + `../../flake.nix` +5. Install: `nixos-install --flake /mnt/etc/zeta/full-ai-cluster#worker-gpu-NN` + +## Install + +See parent [`../../README.md`](../../README.md) bootstrap flow. +Important: write the K3S cluster token (from the control-plane) +to `/var/lib/rancher/k3s/agent/token` BEFORE running +`nixos-install`. K3S refuses to start without it. diff --git a/full-ai-cluster/nixos/hosts/worker-gpu/configuration.nix b/full-ai-cluster/nixos/hosts/worker-gpu/configuration.nix new file mode 100644 index 0000000000..3e0973c6a1 --- /dev/null +++ b/full-ai-cluster/nixos/hosts/worker-gpu/configuration.nix @@ -0,0 +1,53 @@ +# full-ai-cluster/nixos/hosts/worker-gpu/configuration.nix +# +# Worker template. Per physical worker, duplicate this file under +# nixos/hosts/worker-gpu-NN/, add a per-host hardware-configuration, +# and add a nixosConfigurations.worker-gpu-NN entry to flake.nix. +# +# This template runs: NVIDIA GPU + container-toolkit + K8s device +# plugin + Docker + local storage. VFIO passthrough OFF by default +# (enable per-host). + +{ config, pkgs, lib, ... }: + +{ + imports = [ + ./hardware-configuration.nix + ../../modules/common.nix + ../../modules/k3s-agent.nix + ../../modules/gpu.nix + ../../modules/gpu-device-plugin.nix + ../../modules/gpu-passthrough.nix + ../../modules/docker.nix + ../../modules/local-storage.nix + ]; + + networking.hostName = "worker-gpu"; + + # Cluster join target. Override per-site. + services.k3s.serverAddr = "https://control-plane.zeta.local:6443"; + + # Vendor mix for the K8s device plugin. Override per-host if + # this worker has AMD or Intel GPUs alongside (or instead of) NVIDIA. + zeta.gpu-device-plugin = { + enable = true; + vendors = [ "nvidia" ]; + }; + + # VFIO passthrough disabled by default. Enable + list PCI IDs + # per-host when you want a GPU bound to vfio-pci for VM workloads. + zeta.gpu-passthrough = { + enable = false; + pciIds = [ ]; # e.g. [ "10de:2204" "10de:1aef" ] + }; + + # Per-host node labels — let the scheduler target hardware specs. + services.k3s.extraFlags = lib.mkAfter [ + # "--node-label=zeta.io/gpu-model=rtx-4090" + # "--node-label=zeta.io/gpu-count=2" + ]; + + users.users.zeta.openssh.authorizedKeys.keys = [ + # "ssh-ed25519 AAAAC3Nz... aaron@zeta" + ]; +} diff --git a/full-ai-cluster/nixos/hosts/worker-gpu/hardware-configuration.nix b/full-ai-cluster/nixos/hosts/worker-gpu/hardware-configuration.nix new file mode 100644 index 0000000000..d3287a4d11 --- /dev/null +++ b/full-ai-cluster/nixos/hosts/worker-gpu/hardware-configuration.nix @@ -0,0 +1,34 @@ +# full-ai-cluster/nixos/hosts/worker-gpu/hardware-configuration.nix +# +# PLACEHOLDER — replace at install time via: +# nixos-generate-config --root /mnt +# cp /mnt/etc/nixos/hardware-configuration.nix \ +# /mnt/etc/zeta/full-ai-cluster/nixos/hosts/worker-gpu/hardware-configuration.nix + +{ config, lib, modulesPath, ... }: + +{ + imports = [ + (modulesPath + "/installer/scan/not-detected.nix") + ]; + + boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usb_storage" "sd_mod" ]; + boot.initrd.kernelModules = [ ]; + boot.kernelModules = [ "kvm-intel" "kvm-amd" ]; + boot.extraModulePackages = [ ]; + + fileSystems."/" = lib.mkDefault { + device = "/dev/disk/by-label/nixos"; + fsType = "ext4"; + }; + + fileSystems."/boot" = lib.mkDefault { + device = "/dev/disk/by-label/boot"; + fsType = "vfat"; + }; + + swapDevices = lib.mkDefault [ ]; + + networking.useDHCP = lib.mkDefault true; + nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; +} diff --git a/full-ai-cluster/nixos/modules/common.nix b/full-ai-cluster/nixos/modules/common.nix new file mode 100644 index 0000000000..51fd0270a6 --- /dev/null +++ b/full-ai-cluster/nixos/modules/common.nix @@ -0,0 +1,66 @@ +# full-ai-cluster/nixos/modules/common.nix +# +# Shared baseline every cluster host imports. + +{ config, pkgs, lib, stateVersion ? "24.11", ... }: + +{ + nix.settings = { + experimental-features = [ "nix-command" "flakes" ]; + auto-optimise-store = true; + trusted-users = [ "root" "@wheel" ]; + substituters = [ + "https://cache.nixos.org" + "https://nix-community.cachix.org" + ]; + trusted-public-keys = [ + "cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=" + "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=" + ]; + }; + + nix.gc = { + automatic = true; + dates = "weekly"; + options = "--delete-older-than 30d"; + }; + + time.timeZone = lib.mkDefault "America/New_York"; + i18n.defaultLocale = "en_US.UTF-8"; + + networking.networkmanager.enable = true; + networking.firewall.enable = true; + + services.openssh = { + enable = true; + settings = { + PermitRootLogin = lib.mkDefault "prohibit-password"; + PasswordAuthentication = lib.mkDefault false; + KbdInteractiveAuthentication = lib.mkDefault false; + }; + }; + + users.users.zeta = { + isNormalUser = true; + extraGroups = [ "wheel" "networkmanager" ]; + }; + security.sudo.wheelNeedsPassword = lib.mkDefault true; + + environment.systemPackages = with pkgs; [ + git vim htop btop tmux ripgrep jq yq-go curl wget rsync tree + file unzip iproute2 iputils dnsutils nmap tcpdump mtr + pciutils usbutils lshw nvme-cli smartmontools lm_sensors + skopeo + kubectl kubernetes-helm k9s argocd + cilium-cli hubble + ]; + + boot.loader = { + systemd-boot.enable = lib.mkDefault true; + efi.canTouchEfiVariables = lib.mkDefault true; + }; + + powerManagement.cpuFreqGovernor = lib.mkDefault "performance"; + + system.stateVersion = lib.mkDefault stateVersion; +} diff --git a/full-ai-cluster/nixos/modules/docker.nix b/full-ai-cluster/nixos/modules/docker.nix new file mode 100644 index 0000000000..d9a650a4b2 --- /dev/null +++ b/full-ai-cluster/nixos/modules/docker.nix @@ -0,0 +1,44 @@ +# full-ai-cluster/nixos/modules/docker.nix +# +# Docker daemon for non-K8s container workloads (local builds, +# devcontainers, the Hermes image build with SOPS-baked secrets, +# any tooling that needs a real Docker socket). +# +# K3S uses containerd under the hood — this is separate. + +{ config, pkgs, lib, ... }: + +{ + virtualisation.docker = { + enable = true; + + # rootless-by-default avoids accidental privileged-container + # surprises. Maintainers can still use `sudo docker` for cases + # that need the system daemon. + rootless = { + enable = true; + setSocketVariable = true; + }; + + # Enable on-host BuildKit so the SOPS-baking Hermes image build + # uses build-secrets and cache-mounts. + daemon.settings = { + features = { buildkit = true; }; + "experimental" = false; + }; + }; + + # Tooling: docker CLI, compose, buildx. + environment.systemPackages = with pkgs; [ + docker + docker-compose + docker-buildx + ]; + + # Intentionally NOT adding `zeta` to the `docker` group. + # Membership in `docker` is effectively root-on-host because the + # docker socket can mount any path. With rootless Docker enabled + # above, the `zeta` user gets its OWN rootless daemon socket at + # $XDG_RUNTIME_DIR/docker.sock — that's the only docker they need. + # For maintainer tasks requiring the system daemon, use `sudo docker`. +} diff --git a/full-ai-cluster/nixos/modules/gpu-device-plugin.nix b/full-ai-cluster/nixos/modules/gpu-device-plugin.nix new file mode 100644 index 0000000000..6a02435681 --- /dev/null +++ b/full-ai-cluster/nixos/modules/gpu-device-plugin.nix @@ -0,0 +1,174 @@ +# full-ai-cluster/nixos/modules/gpu-device-plugin.nix +# +# Exposes GPUs to Kubernetes pods via the appropriate device plugin +# DaemonSet. The plugins themselves run as K8s DaemonSets (deployed +# by NixOS into the K3S manifests directory so they come up at first +# boot, before ArgoCD takes over). +# +# Each vendor's plugin advertises a different K8s resource name: +# NVIDIA → `nvidia.com/gpu` +# AMD → `amd.com/gpu` +# Intel → `gpu.intel.com/i915` (Xe) or `gpu.intel.com/xe` +# +# Per-host config sets `zeta.gpu-device-plugin.vendors = [ "nvidia" "amd" ];` +# Plugins only get installed for the vendors enabled. + +{ config, pkgs, lib, ... }: + +let + cfg = config.zeta.gpu-device-plugin; +in +{ + options.zeta.gpu-device-plugin = { + enable = lib.mkEnableOption "K8s GPU device plugins"; + + vendors = lib.mkOption { + type = lib.types.listOf (lib.types.enum [ "nvidia" "amd" "intel" ]); + default = [ ]; + description = '' + Which vendor device plugins to install on this host. + Multiple can coexist on the same node if it has mixed GPUs. + ''; + example = [ "nvidia" "amd" ]; + }; + + nvidiaVersion = lib.mkOption { + type = lib.types.str; + default = "v0.17.4"; + description = "NVIDIA k8s-device-plugin chart version."; + }; + + amdVersion = lib.mkOption { + type = lib.types.str; + default = "v1.31.0"; + description = "AMD k8s-device-plugin chart version."; + }; + + intelVersion = lib.mkOption { + type = lib.types.str; + default = "v0.32.1"; + description = "Intel device-plugins-for-kubernetes version."; + }; + }; + + config = lib.mkIf cfg.enable { + # Drop K3S manifests for each enabled vendor. K3S applies them on + # first boot so GPU resources are advertised to the scheduler + # before ArgoCD comes up. These manifests are static (no upgrade + # via ArgoCD today) — bumping the device-plugin version means + # editing the `*Version` options below and re-applying the host's + # nixos-rebuild. A future `k8s/applications/gpu-device-plugin/` + # Application could take over reconciliation, but it doesn't + # exist yet — the K3S-manifest path is the only one. + services.k3s.manifests = lib.mkMerge [ + (lib.mkIf (lib.elem "nvidia" cfg.vendors) { + nvidia-device-plugin.source = pkgs.writeText "nvidia-device-plugin.yaml" '' + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: nvidia-device-plugin-daemonset + namespace: kube-system + spec: + selector: + matchLabels: + name: nvidia-device-plugin-ds + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + name: nvidia-device-plugin-ds + spec: + tolerations: + - { key: CriticalAddonsOnly, operator: Exists } + - { key: nvidia.com/gpu, operator: Exists, effect: NoSchedule } + nodeSelector: + zeta.io/gpu: nvidia + priorityClassName: system-node-critical + containers: + - image: nvcr.io/nvidia/k8s-device-plugin:${cfg.nvidiaVersion} + name: nvidia-device-plugin-ctr + env: + - { name: FAIL_ON_INIT_ERROR, value: "false" } + securityContext: + allowPrivilegeEscalation: false + capabilities: { drop: ["ALL"] } + volumeMounts: + - { name: device-plugin, mountPath: /var/lib/kubelet/device-plugins } + volumes: + - { name: device-plugin, hostPath: { path: /var/lib/kubelet/device-plugins } } + ''; + }) + + (lib.mkIf (lib.elem "amd" cfg.vendors) { + amd-device-plugin.source = pkgs.writeText "amd-device-plugin.yaml" '' + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: amdgpu-device-plugin-daemonset + namespace: kube-system + spec: + selector: + matchLabels: + name: amdgpu-dp-ds + template: + metadata: + labels: + name: amdgpu-dp-ds + spec: + nodeSelector: + zeta.io/gpu: amd + priorityClassName: system-node-critical + containers: + - image: rocm/k8s-device-plugin:${cfg.amdVersion} + name: amdgpu-dp-cntr + securityContext: + allowPrivilegeEscalation: false + capabilities: { drop: ["ALL"] } + volumeMounts: + - { name: dp, mountPath: /var/lib/kubelet/device-plugins } + - { name: sys, mountPath: /sys } + volumes: + - { name: dp, hostPath: { path: /var/lib/kubelet/device-plugins } } + - { name: sys, hostPath: { path: /sys } } + ''; + }) + + (lib.mkIf (lib.elem "intel" cfg.vendors) { + intel-device-plugin.source = pkgs.writeText "intel-device-plugin.yaml" '' + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: intel-gpu-plugin + namespace: kube-system + spec: + selector: + matchLabels: + app: intel-gpu-plugin + template: + metadata: + labels: + app: intel-gpu-plugin + spec: + nodeSelector: + zeta.io/gpu: intel + priorityClassName: system-node-critical + containers: + - name: intel-gpu-plugin + image: intel/intel-gpu-plugin:${cfg.intelVersion} + securityContext: + allowPrivilegeEscalation: false + capabilities: { drop: ["ALL"] } + volumeMounts: + - { name: devfs, mountPath: /dev/dri, readOnly: true } + - { name: sysfs, mountPath: /sys/class/drm, readOnly: true } + - { name: kubeletsockets, mountPath: /var/lib/kubelet/device-plugins } + volumes: + - { name: devfs, hostPath: { path: /dev/dri } } + - { name: sysfs, hostPath: { path: /sys/class/drm } } + - { name: kubeletsockets, hostPath: { path: /var/lib/kubelet/device-plugins } } + ''; + }) + ]; + }; +} diff --git a/full-ai-cluster/nixos/modules/gpu-passthrough.nix b/full-ai-cluster/nixos/modules/gpu-passthrough.nix new file mode 100644 index 0000000000..62d1960ec4 --- /dev/null +++ b/full-ai-cluster/nixos/modules/gpu-passthrough.nix @@ -0,0 +1,75 @@ +# full-ai-cluster/nixos/modules/gpu-passthrough.nix +# +# VFIO GPU passthrough setup. Lets a host bind one or more GPUs to +# vfio-pci at boot so they can be assigned to virtual machines +# (libvirt/QEMU/Cloud-Hypervisor) running on the same host alongside +# K3S workloads. +# +# Per-host override required: set the PCI vendor:device IDs for the +# GPUs you want VFIO-bound. Find them with `lspci -nn | grep VGA`. +# Example: 10de:2204 (NVIDIA RTX 3090) + +{ config, pkgs, lib, ... }: + +let + cfg = config.zeta.gpu-passthrough; +in +{ + options.zeta.gpu-passthrough = { + enable = lib.mkEnableOption "VFIO GPU passthrough"; + + pciIds = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = [ ]; + description = '' + PCI vendor:device IDs to bind to vfio-pci at boot. Find via + `lspci -nn | grep VGA`. Example: [ "10de:2204" "10de:1aef" ] + for a 3090 + its audio function. + ''; + example = [ "10de:2204" "10de:1aef" ]; + }; + }; + + config = lib.mkIf cfg.enable { + # IOMMU on. AMD: amd_iommu=on. Intel: intel_iommu=on. + boot.kernelParams = [ + "intel_iommu=on" # safe on AMD too — ignored if no Intel iommu + "amd_iommu=on" + "iommu=pt" + ] ++ lib.optional (cfg.pciIds != [ ]) "vfio-pci.ids=${lib.concatStringsSep "," cfg.pciIds}"; + + boot.kernelModules = [ + "vfio_pci" + "vfio" + "vfio_iommu_type1" + ]; + + # Bind early so the NVIDIA driver doesn't grab the device first. + boot.initrd.kernelModules = [ + "vfio_pci" + "vfio" + "vfio_iommu_type1" + ]; + + # Libvirt + QEMU stack for hosting passthrough VMs. + virtualisation.libvirtd = { + enable = true; + qemu = { + package = pkgs.qemu_kvm; + runAsRoot = true; + ovmf = { + enable = true; + packages = [ pkgs.OVMFFull.fd ]; + }; + }; + }; + users.users.zeta.extraGroups = [ "libvirtd" "kvm" ]; + + environment.systemPackages = with pkgs; [ + virt-manager + virt-viewer + OVMFFull + qemu_kvm + ]; + }; +} diff --git a/full-ai-cluster/nixos/modules/gpu.nix b/full-ai-cluster/nixos/modules/gpu.nix new file mode 100644 index 0000000000..9facff91a6 --- /dev/null +++ b/full-ai-cluster/nixos/modules/gpu.nix @@ -0,0 +1,55 @@ +# full-ai-cluster/nixos/modules/gpu.nix +# +# NVIDIA driver + container toolkit for AI worker nodes. +# AMD ROCm + Intel oneAPI live in sibling modules (TODO when first +# AMD/Intel cards land). + +{ config, pkgs, lib, ... }: + +{ + nixpkgs.config.allowUnfreePredicate = pkg: + let name = lib.getName pkg; in + builtins.elem name [ + "nvidia-x11" + "nvidia-settings" + "nvidia-persistenced" + "nvidia-docker" + "nvidia-container-toolkit" + ] + || lib.hasPrefix "cuda" name + || lib.hasPrefix "libcu" name + || lib.hasPrefix "libnv" name + || lib.hasPrefix "libnp" name + || name == "cuda-merged"; + + services.xserver.videoDrivers = [ "nvidia" ]; + + hardware.nvidia = { + package = config.boot.kernelPackages.nvidiaPackages.production; + modesetting.enable = true; + nvidiaPersistenced = true; + powerManagement.enable = false; + powerManagement.finegrained = false; + open = lib.mkDefault false; + }; + + hardware.graphics = { + enable = true; + enable32Bit = true; + }; + + hardware.nvidia-container-toolkit.enable = true; + + environment.systemPackages = with pkgs; [ + nvtopPackages.nvidia + cudaPackages.cuda_cudart + cudaPackages.cuda_nvcc + glxinfo + vulkan-tools + clinfo + ]; + + services.k3s.extraFlags = lib.mkAfter [ + "--node-label=zeta.io/gpu=nvidia" + ]; +} diff --git a/full-ai-cluster/nixos/modules/k3s-agent.nix b/full-ai-cluster/nixos/modules/k3s-agent.nix new file mode 100644 index 0000000000..b9db08277e --- /dev/null +++ b/full-ai-cluster/nixos/modules/k3s-agent.nix @@ -0,0 +1,43 @@ +# full-ai-cluster/nixos/modules/k3s-agent.nix +# +# K3S worker. Matches the server's CNI takeover (no flannel, +# no kube-proxy, Cilium owns the network). + +{ config, pkgs, lib, ... }: + +{ + services.k3s = { + enable = true; + role = "agent"; + serverAddr = lib.mkDefault "https://control-plane.zeta.local:6443"; + tokenFile = lib.mkDefault "/var/lib/rancher/k3s/agent/token"; + + extraFlags = [ + "--node-label=zeta.io/role=worker" + + # NOTE: server-only flags like `--flannel-backend=none`, + # `--disable-kube-proxy`, and `--disable-network-policy` + # are NOT set here — they're server-side and the agent + # inherits the network configuration from the server. K3S + # rejects them on agents with a `flag not supported` error. + # Cilium owns CNI on both sides; the server-side flags are + # what disables flannel cluster-wide. + ]; + }; + + networking.firewall = { + allowedTCPPorts = [ + 10250 # kubelet + 4244 # Hubble server + 8472 # legacy VXLAN + ]; + allowedUDPPorts = [ + 8472 + ]; + trustedInterfaces = [ "cilium_host" "cilium_net" "cni0" "lxc+" ]; + }; + + systemd.tmpfiles.rules = [ + "d /var/lib/rancher/k3s 0755 root root - -" + ]; +} diff --git a/full-ai-cluster/nixos/modules/k3s-server.nix b/full-ai-cluster/nixos/modules/k3s-server.nix new file mode 100644 index 0000000000..a418b02156 --- /dev/null +++ b/full-ai-cluster/nixos/modules/k3s-server.nix @@ -0,0 +1,89 @@ +# full-ai-cluster/nixos/modules/k3s-server.nix +# +# K3S control-plane configured for Cilium CNI takeover. +# +# K3S ships with flannel (CNI), kube-proxy, network-policy, servicelb, +# and traefik. Cilium replaces flannel + kube-proxy + network-policy. +# We disable all five so Cilium owns the network layer end-to-end. + +{ config, pkgs, lib, ... }: + +{ + services.k3s = { + enable = true; + role = "server"; + tokenFile = lib.mkDefault "/var/lib/rancher/k3s/server/token"; + clusterInit = lib.mkDefault true; + + extraFlags = [ + "--write-kubeconfig-mode=0640" + "--write-kubeconfig-group=wheel" + + # CNI takeover by Cilium — disable flannel + kube-proxy + the + # built-in network-policy controller. Cilium handles all three. + "--flannel-backend=none" + "--disable-network-policy" + "--disable-kube-proxy" + + # Disable bundled servicelb + traefik. No replacement L4 + # load-balancer or ingress is declared in this PR — Services + # of type LoadBalancer will stay Pending until a maintainer + # commits a MetalLB + ingress-nginx Application under + # k8s/applications/. Bootstrap-period workloads needing + # external traffic should use NodePort or `kubectl port-forward`. + "--disable=servicelb" + "--disable=traefik" + + # Cluster CIDR — give Cilium a /16 to work with. + "--cluster-cidr=10.42.0.0/16" + "--service-cidr=10.43.0.0/16" + ]; + + # K3S applies these manifests on first boot. We seed only what's + # required to get Cilium + ArgoCD running. ArgoCD takes over and + # reconciles every other workload from k8s/applications/. + manifests = { + # CNI MUST come first — without it no pods can schedule, + # including ArgoCD's own pods. Cilium installs here; ArgoCD's + # cilium Application (k8s/applications/cilium/) takes over + # reconciliation once it's healthy. + cilium-namespace.source = ../../k8s/bootstrap/cilium-namespace.yaml; + cilium-install.source = ../../k8s/bootstrap/cilium-install.yaml; + # Then ArgoCD itself. + argocd-namespace.source = ../../k8s/bootstrap/argocd-namespace.yaml; + argocd-install.source = ../../k8s/bootstrap/argocd-install.yaml; + # Finally the App-of-Apps that hands off to ArgoCD. + root-application.source = ../../k8s/bootstrap/root-application.yaml; + }; + }; + + networking.firewall = { + allowedTCPPorts = [ + 6443 # K3S API + 9345 # K3S supervisor/join + 10250 # kubelet + 4244 # Hubble server + 4245 # Hubble Relay + 8472 # legacy flannel/VXLAN (kept for safety) + # etcd ports 2379/2380 intentionally NOT in this list. + # K3S embedded etcd binds 127.0.0.1 by default. Opening + # those ports at the host firewall would risk exposing etcd + # to the LAN if the bind address ever drifts. For multi- + # server HA, add 2379/2380 to a host-specific override that + # ALSO scopes them with `interfacesIn`/source-IP filtering to + # the other control-plane nodes only. + ]; + allowedUDPPorts = [ + 8472 # VXLAN (Cilium can also run native-routing) + ]; + trustedInterfaces = [ "cilium_host" "cilium_net" "cni0" "lxc+" ]; + }; + + environment.variables = { + KUBECONFIG = "/etc/rancher/k3s/k3s.yaml"; + }; + + systemd.tmpfiles.rules = [ + "d /var/lib/rancher/k3s 0755 root root - -" + ]; +} diff --git a/full-ai-cluster/nixos/modules/local-storage.nix b/full-ai-cluster/nixos/modules/local-storage.nix new file mode 100644 index 0000000000..6f77dbaacd --- /dev/null +++ b/full-ai-cluster/nixos/modules/local-storage.nix @@ -0,0 +1,139 @@ +# full-ai-cluster/nixos/modules/local-storage.nix +# +# Local-path storage class for K8s. Provisions hostPath PVs out of +# /var/lib/zeta-local-storage/ on whichever node a pod lands on. +# +# Good for stateless workloads, scratch, cache. NOT for anything +# that needs to survive node failure — Longhorn (via ArgoCD) handles +# distributed storage for stateful workloads. +# +# Installed as a K3S auto-applied manifest so it's available before +# ArgoCD comes up. + +{ config, pkgs, lib, ... }: + +{ + # Ensure the host directory exists with correct ownership. + systemd.tmpfiles.rules = [ + "d /var/lib/zeta-local-storage 0755 root root - -" + ]; + + # Install rancher/local-path-provisioner via K3S manifest. K3S + # actually ships this by default but we re-declare it explicitly + # so the path + storage class name match across the cluster. + services.k3s.manifests = { + local-path-provisioner.source = pkgs.writeText "local-path-provisioner.yaml" '' + apiVersion: v1 + kind: Namespace + metadata: + name: local-path-storage + --- + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: zeta-local-path + annotations: + storageclass.kubernetes.io/is-default-class: "true" + provisioner: rancher.io/local-path + reclaimPolicy: Delete + volumeBindingMode: WaitForFirstConsumer + --- + apiVersion: v1 + kind: ConfigMap + metadata: + name: local-path-config + namespace: local-path-storage + data: + config.json: | + { + "nodePathMap": [ + { + "node": "DEFAULT_PATH_FOR_NON_LISTED_NODES", + "paths": ["/var/lib/zeta-local-storage"] + } + ] + } + setup: |- + #!/bin/sh + set -eu + path="$VOL_DIR" + [ -n "$path" ] || { echo "VOL_DIR empty; refusing to mkdir"; exit 1; } + case "$path" in /var/lib/zeta-local-storage/*) ;; *) echo "VOL_DIR outside allowed root: $path"; exit 1 ;; esac + mkdir -m 0777 -p "$path" + teardown: |- + #!/bin/sh + set -eu + path="$VOL_DIR" + [ -n "$path" ] || { echo "VOL_DIR empty; refusing to rm"; exit 1; } + case "$path" in /var/lib/zeta-local-storage/*) ;; *) echo "VOL_DIR outside allowed root: $path"; exit 1 ;; esac + rm -rf "$path" + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: local-path-provisioner + namespace: local-path-storage + spec: + replicas: 1 + selector: + matchLabels: { app: local-path-provisioner } + template: + metadata: + labels: { app: local-path-provisioner } + spec: + serviceAccountName: local-path-provisioner-service-account + containers: + - name: local-path-provisioner + image: rancher/local-path-provisioner:v0.0.30 + imagePullPolicy: IfNotPresent + command: + - local-path-provisioner + - start + - --config + - /etc/config/config.json + volumeMounts: + - { name: config-volume, mountPath: /etc/config/ } + env: + - { name: POD_NAMESPACE, valueFrom: { fieldRef: { fieldPath: metadata.namespace } } } + volumes: + - { name: config-volume, configMap: { name: local-path-config } } + --- + apiVersion: v1 + kind: ServiceAccount + metadata: + name: local-path-provisioner-service-account + namespace: local-path-storage + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: local-path-provisioner-role + rules: + - apiGroups: [""] + resources: ["nodes", "persistentvolumeclaims", "configmaps", "pods", "pods/log"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "patch", "update", "delete"] + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: local-path-provisioner-bind + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: local-path-provisioner-role + subjects: + - kind: ServiceAccount + name: local-path-provisioner-service-account + namespace: local-path-storage + ''; + }; +} diff --git a/full-ai-cluster/usb-nixos-installer/README.md b/full-ai-cluster/usb-nixos-installer/README.md new file mode 100644 index 0000000000..86d444c5c7 --- /dev/null +++ b/full-ai-cluster/usb-nixos-installer/README.md @@ -0,0 +1,95 @@ +# usb-nixos-installer + +**Scope: ONLY the USB bootstrap portion.** + +This directory contains exactly the four things needed to produce a +bootable NixOS USB installer that can install the target operating +system on a new machine over USB or Ethernet: + +1. **NixOS declarative configuration** — `nixos/installer/configuration.nix` +2. **NixFlakes for packages** — `flake.nix` at the directory root +3. **Git for version text storage** — every file here lives in git; + `flake.nix` references inputs by Git branch. **Run + `nix flake update` and commit the resulting `flake.lock`** to + pin to specific revisions for fully-reproducible builds. The + lock file isn't committed yet (no maintainer with Nix has run + `nix flake update` on this branch yet); first maintainer to + build the ISO should commit it. +4. **The OS Flake on a USB stick** — `nix build .#installer-iso` + produces a bootable ISO image you `dd` to a USB stick. The same + ISO supports Ethernet install (boot the target on the stick, + then `nixos-install --flake #` over the network). + +**This directory is intentionally minimal.** It does NOT contain +K3S, ArgoCD, Orleans, GitLab, observability, GPU runtime, or any +cluster workload. Those live in the `full-ai-cluster/` directory +at the repo root. + +For the full end-to-end AI cluster (including this USB bootstrap +as its starting snippet), see +. + +## Build the USB stick + +From any machine with Nix installed: + +```bash +cd usb-nixos-installer +nix build .#installer-iso +# Output: result/iso/zeta-installer-*.iso (~1.5-2 GB) +``` + +## Write the ISO to a USB stick + +### macOS + +```bash +diskutil list # find the USB device (e.g. /dev/disk4) +diskutil unmountDisk /dev/disk4 # replace 4 with your USB device number +sudo dd if=result/iso/zeta-installer-*.iso of=/dev/rdisk4 bs=4m status=progress +diskutil eject /dev/disk4 +``` + +### Linux + +```bash +lsblk # find the USB device (e.g. /dev/sdb) +sudo dd if=result/iso/zeta-installer-*.iso of=/dev/sdb bs=4M status=progress conv=fsync +sync +``` + +## Install on a target machine + +1. Boot the target on the USB stick. +2. Log in at the console as `root` (no password — upstream NixOS + installer default; console-only). +3. Bring up the network with `nmtui` (interactive) or + `nmcli device wifi connect password `. +4. Identify the target disk with `lsblk`. +5. Partition + mount as desired (parted/gptfdisk/cryptsetup/zfs + are all on the stick). +6. Generate per-machine hardware config: + `nixos-generate-config --root /mnt` +7. Install: + `nixos-install --flake #` where `` is one + of the names declared in `flake.nix` `nixosConfigurations`. + (This minimal installer only declares `installer` itself — + target-machine hosts live in `../full-ai-cluster/flake.nix`.) +8. Reboot. + +## What's on the stick + +The complete package list lives in +[`nixos/installer/configuration.nix`](nixos/installer/configuration.nix) +under `environment.systemPackages`. Categories include: + +- Version control: git, git-lfs, gnupg, openssh +- Editors: vim, neovim, nano +- Shell QoL: tmux, htop, ripgrep, jq, yq-go, fzf, bat +- Network: curl, wget, nmap, networkmanager, iwd, wireguard-tools +- Disk: parted, gptfdisk, cryptsetup, zfs, lvm2, mdadm +- Hardware inspection: lshw, dmidecode, nvme-cli, lm_sensors +- NixOS install tooling: nixos-install-tools, nix-output-monitor + +The flake itself is the tick source. Every subsequent install +reconciles toward the desired state declared here. diff --git a/full-ai-cluster/usb-nixos-installer/flake.nix b/full-ai-cluster/usb-nixos-installer/flake.nix new file mode 100644 index 0000000000..715791f2a9 --- /dev/null +++ b/full-ai-cluster/usb-nixos-installer/flake.nix @@ -0,0 +1,50 @@ +# usb-nixos-installer/flake.nix +# +# USB-only flake. Produces a bootable NixOS installer ISO. +# Builds on Linux x86_64 natively; on Apple Silicon Macs use the +# nix-darwin linux-builder pattern (see the cluster flake at +# https://github.com/Lucent-Financial-Group/Zeta/tree/main/full-ai-cluster). + +{ + description = "Zeta USB installer — NixOS bootable image for AI-cluster bootstrap"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils, ... }@inputs: + let + stateVersion = "24.11"; + in + { + nixosConfigurations.installer = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + specialArgs = { inherit inputs stateVersion; }; + modules = [ + ./nixos/installer/configuration.nix + ]; + }; + } // flake-utils.lib.eachSystem [ "x86_64-linux" ] (system: + let + pkgs = import nixpkgs { inherit system; }; + in + { + packages = { + installer-iso = + self.nixosConfigurations.installer.config.system.build.isoImage; + default = self.packages.${system}.installer-iso; + }; + + devShells.default = pkgs.mkShell { + name = "zeta-usb-installer"; + packages = with pkgs; [ + git + nix-output-monitor + nh + ]; + }; + + formatter = pkgs.nixpkgs-fmt; + }); +} diff --git a/full-ai-cluster/usb-nixos-installer/nixos/installer/configuration.nix b/full-ai-cluster/usb-nixos-installer/nixos/installer/configuration.nix new file mode 100644 index 0000000000..8181c9fd5a --- /dev/null +++ b/full-ai-cluster/usb-nixos-installer/nixos/installer/configuration.nix @@ -0,0 +1,204 @@ +# usb-nixos-installer/nixos/installer/configuration.nix +# +# Single-file declarative installer image. Contains ONLY what's +# needed to boot a target machine and run `nixos-install --flake` +# against a host config from this repo. + +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + "${modulesPath}/installer/cd-dvd/installation-cd-minimal.nix" + "${modulesPath}/installer/cd-dvd/channel.nix" + ]; + + networking.hostName = "zeta-installer"; + time.timeZone = "America/New_York"; + i18n.defaultLocale = "en_US.UTF-8"; + + nix.settings = { + experimental-features = [ "nix-command" "flakes" ]; + auto-optimise-store = true; + trusted-users = [ "root" "nixos" ]; + substituters = [ + "https://cache.nixos.org" + "https://nix-community.cachix.org" + ]; + trusted-public-keys = [ + "cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=" + "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=" + ]; + }; + + networking.networkmanager.enable = true; + networking.wireless.enable = lib.mkForce false; + networking.firewall.enable = true; + + # SSH off by default; console-only install. Enable manually for + # headless install with `sudo passwd nixos; sudo systemctl start sshd`. + services.openssh = { + enable = lib.mkForce false; + settings = { + PermitRootLogin = lib.mkForce "prohibit-password"; + PasswordAuthentication = lib.mkForce false; + KbdInteractiveAuthentication = lib.mkForce false; + }; + }; + + users.users.nixos = { + isNormalUser = true; + extraGroups = [ "wheel" "networkmanager" ]; + }; + + environment.systemPackages = with pkgs; [ + # Version control: pull the cluster flake onto the target + git + git-lfs + gnupg + openssh + + # Editors + vim + neovim + nano + + # Shell quality of life + bash + zsh + tmux + screen + htop + btop + tree + ripgrep + fd + fzf + bat + eza + jq + yq-go + less + file + which + unzip + zip + p7zip + rsync + + # Network + curl + wget + iproute2 + iputils + inetutils + dnsutils + nmap + tcpdump + mtr + ethtool + bind + networkmanager + iwd + wpa_supplicant + openvpn + wireguard-tools + + # Disk / partitioning / filesystems + parted + gptfdisk + util-linux + cryptsetup + dosfstools + e2fsprogs + xfsprogs + btrfs-progs + zfs + lvm2 + mdadm + smartmontools + + # Hardware inspection + pciutils + usbutils + lshw + dmidecode + hwinfo + inxi + lm_sensors + nvme-cli + hdparm + + # GPU detection (drivers come in per-host on installed system) + glxinfo + vulkan-tools + clinfo + + # NixOS install tooling + nixos-install-tools + nix-output-monitor + nvd + nh + + # Secrets management + age + sops + ssh-to-age + + # Build helpers + coreutils + findutils + gawk + gnused + gnugrep + diffutils + patch + gcc + gnumake + pkg-config + + # Observability of the install itself + iotop + iftop + ncdu + pv + progress + + # Documentation on the stick + man-pages + man-pages-posix + tldr + ]; + + isoImage = { + isoName = lib.mkForce "zeta-installer-${config.system.nixos.release}.iso"; + volumeID = lib.mkForce "ZETA_INSTALL"; + makeEfiBootable = true; + makeUsbBootable = true; + }; + + environment.etc."zeta-install.md".text = '' + Zeta USB installer + ================== + + 1. Boot this USB on the target machine. + 2. Log in at the console as `root` (no password — upstream + installer default; only usable from the local TTY). + 3. Bring up the network: + nmtui # interactive, or + nmcli device wifi connect password + 4. Identify the target disk: + lsblk + 5. Partition + mount /mnt as desired. + 6. Generate hardware config: + nixos-generate-config --root /mnt + 7. Clone the full cluster flake (or this minimal USB flake): + git clone /mnt/etc/zeta + 8. Install: + nixos-install --flake /mnt/etc/zeta/full-ai-cluster# + or for USB-only: + nixos-install --flake /mnt/etc/zeta/usb-nixos-installer#installer + 9. Reboot. + ''; + + system.stateVersion = "24.11"; +} diff --git a/usb-nixos-installer/README.md b/usb-nixos-installer/README.md new file mode 100644 index 0000000000..86d444c5c7 --- /dev/null +++ b/usb-nixos-installer/README.md @@ -0,0 +1,95 @@ +# usb-nixos-installer + +**Scope: ONLY the USB bootstrap portion.** + +This directory contains exactly the four things needed to produce a +bootable NixOS USB installer that can install the target operating +system on a new machine over USB or Ethernet: + +1. **NixOS declarative configuration** — `nixos/installer/configuration.nix` +2. **NixFlakes for packages** — `flake.nix` at the directory root +3. **Git for version text storage** — every file here lives in git; + `flake.nix` references inputs by Git branch. **Run + `nix flake update` and commit the resulting `flake.lock`** to + pin to specific revisions for fully-reproducible builds. The + lock file isn't committed yet (no maintainer with Nix has run + `nix flake update` on this branch yet); first maintainer to + build the ISO should commit it. +4. **The OS Flake on a USB stick** — `nix build .#installer-iso` + produces a bootable ISO image you `dd` to a USB stick. The same + ISO supports Ethernet install (boot the target on the stick, + then `nixos-install --flake #` over the network). + +**This directory is intentionally minimal.** It does NOT contain +K3S, ArgoCD, Orleans, GitLab, observability, GPU runtime, or any +cluster workload. Those live in the `full-ai-cluster/` directory +at the repo root. + +For the full end-to-end AI cluster (including this USB bootstrap +as its starting snippet), see +. + +## Build the USB stick + +From any machine with Nix installed: + +```bash +cd usb-nixos-installer +nix build .#installer-iso +# Output: result/iso/zeta-installer-*.iso (~1.5-2 GB) +``` + +## Write the ISO to a USB stick + +### macOS + +```bash +diskutil list # find the USB device (e.g. /dev/disk4) +diskutil unmountDisk /dev/disk4 # replace 4 with your USB device number +sudo dd if=result/iso/zeta-installer-*.iso of=/dev/rdisk4 bs=4m status=progress +diskutil eject /dev/disk4 +``` + +### Linux + +```bash +lsblk # find the USB device (e.g. /dev/sdb) +sudo dd if=result/iso/zeta-installer-*.iso of=/dev/sdb bs=4M status=progress conv=fsync +sync +``` + +## Install on a target machine + +1. Boot the target on the USB stick. +2. Log in at the console as `root` (no password — upstream NixOS + installer default; console-only). +3. Bring up the network with `nmtui` (interactive) or + `nmcli device wifi connect password `. +4. Identify the target disk with `lsblk`. +5. Partition + mount as desired (parted/gptfdisk/cryptsetup/zfs + are all on the stick). +6. Generate per-machine hardware config: + `nixos-generate-config --root /mnt` +7. Install: + `nixos-install --flake #` where `` is one + of the names declared in `flake.nix` `nixosConfigurations`. + (This minimal installer only declares `installer` itself — + target-machine hosts live in `../full-ai-cluster/flake.nix`.) +8. Reboot. + +## What's on the stick + +The complete package list lives in +[`nixos/installer/configuration.nix`](nixos/installer/configuration.nix) +under `environment.systemPackages`. Categories include: + +- Version control: git, git-lfs, gnupg, openssh +- Editors: vim, neovim, nano +- Shell QoL: tmux, htop, ripgrep, jq, yq-go, fzf, bat +- Network: curl, wget, nmap, networkmanager, iwd, wireguard-tools +- Disk: parted, gptfdisk, cryptsetup, zfs, lvm2, mdadm +- Hardware inspection: lshw, dmidecode, nvme-cli, lm_sensors +- NixOS install tooling: nixos-install-tools, nix-output-monitor + +The flake itself is the tick source. Every subsequent install +reconciles toward the desired state declared here. diff --git a/usb-nixos-installer/flake.nix b/usb-nixos-installer/flake.nix new file mode 100644 index 0000000000..715791f2a9 --- /dev/null +++ b/usb-nixos-installer/flake.nix @@ -0,0 +1,50 @@ +# usb-nixos-installer/flake.nix +# +# USB-only flake. Produces a bootable NixOS installer ISO. +# Builds on Linux x86_64 natively; on Apple Silicon Macs use the +# nix-darwin linux-builder pattern (see the cluster flake at +# https://github.com/Lucent-Financial-Group/Zeta/tree/main/full-ai-cluster). + +{ + description = "Zeta USB installer — NixOS bootable image for AI-cluster bootstrap"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils, ... }@inputs: + let + stateVersion = "24.11"; + in + { + nixosConfigurations.installer = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + specialArgs = { inherit inputs stateVersion; }; + modules = [ + ./nixos/installer/configuration.nix + ]; + }; + } // flake-utils.lib.eachSystem [ "x86_64-linux" ] (system: + let + pkgs = import nixpkgs { inherit system; }; + in + { + packages = { + installer-iso = + self.nixosConfigurations.installer.config.system.build.isoImage; + default = self.packages.${system}.installer-iso; + }; + + devShells.default = pkgs.mkShell { + name = "zeta-usb-installer"; + packages = with pkgs; [ + git + nix-output-monitor + nh + ]; + }; + + formatter = pkgs.nixpkgs-fmt; + }); +} diff --git a/usb-nixos-installer/nixos/installer/configuration.nix b/usb-nixos-installer/nixos/installer/configuration.nix new file mode 100644 index 0000000000..8181c9fd5a --- /dev/null +++ b/usb-nixos-installer/nixos/installer/configuration.nix @@ -0,0 +1,204 @@ +# usb-nixos-installer/nixos/installer/configuration.nix +# +# Single-file declarative installer image. Contains ONLY what's +# needed to boot a target machine and run `nixos-install --flake` +# against a host config from this repo. + +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + "${modulesPath}/installer/cd-dvd/installation-cd-minimal.nix" + "${modulesPath}/installer/cd-dvd/channel.nix" + ]; + + networking.hostName = "zeta-installer"; + time.timeZone = "America/New_York"; + i18n.defaultLocale = "en_US.UTF-8"; + + nix.settings = { + experimental-features = [ "nix-command" "flakes" ]; + auto-optimise-store = true; + trusted-users = [ "root" "nixos" ]; + substituters = [ + "https://cache.nixos.org" + "https://nix-community.cachix.org" + ]; + trusted-public-keys = [ + "cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=" + "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=" + ]; + }; + + networking.networkmanager.enable = true; + networking.wireless.enable = lib.mkForce false; + networking.firewall.enable = true; + + # SSH off by default; console-only install. Enable manually for + # headless install with `sudo passwd nixos; sudo systemctl start sshd`. + services.openssh = { + enable = lib.mkForce false; + settings = { + PermitRootLogin = lib.mkForce "prohibit-password"; + PasswordAuthentication = lib.mkForce false; + KbdInteractiveAuthentication = lib.mkForce false; + }; + }; + + users.users.nixos = { + isNormalUser = true; + extraGroups = [ "wheel" "networkmanager" ]; + }; + + environment.systemPackages = with pkgs; [ + # Version control: pull the cluster flake onto the target + git + git-lfs + gnupg + openssh + + # Editors + vim + neovim + nano + + # Shell quality of life + bash + zsh + tmux + screen + htop + btop + tree + ripgrep + fd + fzf + bat + eza + jq + yq-go + less + file + which + unzip + zip + p7zip + rsync + + # Network + curl + wget + iproute2 + iputils + inetutils + dnsutils + nmap + tcpdump + mtr + ethtool + bind + networkmanager + iwd + wpa_supplicant + openvpn + wireguard-tools + + # Disk / partitioning / filesystems + parted + gptfdisk + util-linux + cryptsetup + dosfstools + e2fsprogs + xfsprogs + btrfs-progs + zfs + lvm2 + mdadm + smartmontools + + # Hardware inspection + pciutils + usbutils + lshw + dmidecode + hwinfo + inxi + lm_sensors + nvme-cli + hdparm + + # GPU detection (drivers come in per-host on installed system) + glxinfo + vulkan-tools + clinfo + + # NixOS install tooling + nixos-install-tools + nix-output-monitor + nvd + nh + + # Secrets management + age + sops + ssh-to-age + + # Build helpers + coreutils + findutils + gawk + gnused + gnugrep + diffutils + patch + gcc + gnumake + pkg-config + + # Observability of the install itself + iotop + iftop + ncdu + pv + progress + + # Documentation on the stick + man-pages + man-pages-posix + tldr + ]; + + isoImage = { + isoName = lib.mkForce "zeta-installer-${config.system.nixos.release}.iso"; + volumeID = lib.mkForce "ZETA_INSTALL"; + makeEfiBootable = true; + makeUsbBootable = true; + }; + + environment.etc."zeta-install.md".text = '' + Zeta USB installer + ================== + + 1. Boot this USB on the target machine. + 2. Log in at the console as `root` (no password — upstream + installer default; only usable from the local TTY). + 3. Bring up the network: + nmtui # interactive, or + nmcli device wifi connect password + 4. Identify the target disk: + lsblk + 5. Partition + mount /mnt as desired. + 6. Generate hardware config: + nixos-generate-config --root /mnt + 7. Clone the full cluster flake (or this minimal USB flake): + git clone /mnt/etc/zeta + 8. Install: + nixos-install --flake /mnt/etc/zeta/full-ai-cluster# + or for USB-only: + nixos-install --flake /mnt/etc/zeta/usb-nixos-installer#installer + 9. Reboot. + ''; + + system.stateVersion = "24.11"; +}