diff --git a/.github/workflows/build-installer-iso.yml b/.github/workflows/build-installer-iso.yml deleted file mode 100644 index 7500d6b4b1..0000000000 --- a/.github/workflows/build-installer-iso.yml +++ /dev/null @@ -1,230 +0,0 @@ -# .github/workflows/build-installer-iso.yml -# -# Builds the Zeta cluster installer ISO from infra/nixos/hosts/installer/ -# via the repo-root flake. Runs on every PR that touches the flake/infra, -# every push to main, and on release publish (to attach the ISO to the -# Release as a downloadable asset). -# -# Why on a Linux runner and not the existing macOS gate matrix: -# The ISO target is `x86_64-linux`. Building it on macOS requires the -# nix-darwin `linux-builder` VM (Apple Virtualization.framework + -# Rosetta 2). That works locally for maintainers, but the gate CI -# already runs on ubuntu-24.04 — building directly there is faster, -# cheaper, and uses no cross-compile. -# -# Discipline (per .github/workflows/gate.yml): -# - Runner pinned to ubuntu-24.04 (not -latest) -# - Third-party actions SHA-pinned with trailing # vX.Y.Z comments -# - permissions: contents: read at workflow level. The -# `attach-to-release` job elevates to `contents: write` only -# for itself (release-asset upload). -# - Concurrency: workflow-scoped, cancel-in-progress only for PRs -# - github.event.* values that may be attacker-controlled (release -# tag names, etc.) are passed via env: not interpolated into -# run: lines, per the GitHub Actions injection guide. - -name: build-installer-iso - -on: - pull_request: - types: [opened, reopened, synchronize, ready_for_review] - paths: - - 'flake.nix' - - 'flake.lock' - - 'infra/nixos/**' - - '.github/workflows/build-installer-iso.yml' - push: - branches: [main] - paths: - - 'flake.nix' - - 'flake.lock' - - 'infra/nixos/**' - - '.github/workflows/build-installer-iso.yml' - workflow_dispatch: - release: - types: [published] - -permissions: - contents: read - -concurrency: - group: build-installer-iso-${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - build: - name: build-iso - # Skip on release events — attach-to-release rebuilds at the tag so the - # asset matches the release exactly. Running both would build the ISO - # twice per release publish for no added verification. - if: github.event_name != 'release' - runs-on: ubuntu-24.04 - timeout-minutes: 60 - steps: - - name: Checkout - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - # Need full history so the flake.lock pinning is reproducible - # and any `git describe` style versioning works. - fetch-depth: 0 - - - name: Install Nix - # Determinate Systems Nix installer — same one maintainers use - # locally. Enables flakes + nix-command by default. - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - - # NOTE: previously had `DeterminateSystems/magic-nix-cache-action` - # here but it now requires FlakeHub auth (FlakeHub.com account / - # organization registration), which the project doesn't have set - # up. Removed to unblock builds. First-build cost is ~10-15 min - # instead of ~3 min cached. Follow-up to add `actions/cache` on - # /nix/store or `nix-community/cache-nix-action` (no FlakeHub - # auth required) is tracked separately. - - - name: Show flake metadata - run: nix flake metadata --json | jq '{description, lastModified, revision}' - - - name: Check flake evaluates - # Cheap eval-only check — catches typos, missing imports, - # undefined attributes before paying for a full build. - run: nix flake check --no-build --show-trace - - - name: Build installer ISO - # The actual build. Produces result/iso/zeta-installer-*.iso. - run: nix build .#installer-iso --print-build-logs - - - name: Locate ISO + capture metadata - id: iso - run: | - set -euo pipefail - # Enforce single match. Multiple matches would be a substrate - # surprise (build layout changed?) and silently picking one - # is worse than failing loudly. - mapfile -t iso_candidates < <(find result/iso -maxdepth 1 -type f \( -name 'zeta-installer-*.iso' -o -name 'nixos-minimal-*.iso' \) | sort) - if [ "${#iso_candidates[@]}" -eq 0 ]; then - echo "::error::No installer ISO found under result/iso/ (looked for zeta-installer-*.iso OR nixos-minimal-*.iso per nixpkgs 25.11 default; see B-0818)" >&2 - ls -la result/iso/ >&2 || true - exit 1 - fi - if [ "${#iso_candidates[@]}" -gt 1 ]; then - echo "::error::Multiple installer ISOs under result/iso/; expected exactly one:" >&2 - printf ' %s\n' "${iso_candidates[@]}" >&2 - exit 1 - fi - iso_path="${iso_candidates[0]}" - iso_name=$(basename "$iso_path") - iso_size=$(stat -c%s "$iso_path" | numfmt --to=iec --suffix=B) - iso_sha256=$(sha256sum "$iso_path" | awk '{print $1}') - { - echo "path=$iso_path" - echo "name=$iso_name" - echo "size=$iso_size" - echo "sha256=$iso_sha256" - } >> "$GITHUB_OUTPUT" - { - echo "## Installer ISO built" - echo "" - echo "| Field | Value |" - echo "|---|---|" - echo "| File | \`$iso_name\` |" - echo "| Size | $iso_size |" - echo "| SHA256 | \`$iso_sha256\` |" - } >> "$GITHUB_STEP_SUMMARY" - - - name: Upload ISO as workflow artifact - # Available for download from the workflow run page for ~90 days. - # Anyone reviewing the PR can grab it and dd it to a USB stick - # without needing Nix installed locally. - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 - with: - name: ${{ steps.iso.outputs.name }} - path: ${{ steps.iso.outputs.path }} - if-no-files-found: error - retention-days: 90 - compression-level: 0 # ISO is already compressed; re-zipping wastes time - - attach-to-release: - name: attach-iso-to-release - # No `needs: build`. The build job is skipped on release events - # (see its own `if: github.event_name != 'release'`); declaring - # `needs: build` here would short-circuit attach-to-release to - # "skipped" too because skipped-by-`if` propagates through - # `needs:` by default. The two jobs are independent: attach-to- - # release builds the ISO itself at the release tag, with its - # own ref pinning. - if: github.event_name == 'release' - runs-on: ubuntu-24.04 - timeout-minutes: 30 - permissions: - contents: write # needed to upload the ISO as a release asset - steps: - - name: Checkout at the release tag - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - # Explicit ref pin: on release events GITHUB_REF defaults - # to the tag, but pinning explicitly is defense in depth - # against payload variation (and reads more clearly). - ref: ${{ github.event.release.tag_name }} - # Match the build job: full history + tags so `git describe` - # style versioning and flake.lock pinning are reproducible at - # the tag. Default fetch-depth: 1 omits tags/history and risks - # silent drift on release builds. - fetch-depth: 0 - - - name: Install Nix - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - - # magic-nix-cache-action removed — requires FlakeHub auth not yet - # set up. See note in the build job above. - - - name: Rebuild ISO for the tagged release - # Re-builds at the tag so the ISO ships exactly the source the - # release advertises. The build job above already produced one, - # but artifacts and release assets are different stores. - run: nix build .#installer-iso --print-build-logs - - - name: Upload ISO + SHA256 to the release - # Release tag_name is set by whoever created the release — - # treated as attacker-controlled per the GH Actions injection - # guide. Passed via env, never interpolated into the shell. - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - RELEASE_TAG: ${{ github.event.release.tag_name }} - run: | - set -euo pipefail - # Enforce single match — release assets are publicly downloaded - # and silently picking one when multiple exist would be worse - # than failing loudly. - mapfile -t iso_candidates < <(find result/iso -maxdepth 1 -type f \( -name 'zeta-installer-*.iso' -o -name 'nixos-minimal-*.iso' \) | sort) - if [ "${#iso_candidates[@]}" -eq 0 ]; then - echo "::error::No installer ISO found under result/iso/ (looked for zeta-installer-*.iso OR nixos-minimal-*.iso per nixpkgs 25.11 default; see B-0818)" >&2 - ls -la result/iso/ >&2 || true - exit 1 - fi - if [ "${#iso_candidates[@]}" -gt 1 ]; then - echo "::error::Multiple installer ISOs under result/iso/; refusing to upload an arbitrary one to release ${RELEASE_TAG}:" >&2 - printf ' %s\n' "${iso_candidates[@]}" >&2 - exit 1 - fi - iso_path="${iso_candidates[0]}" - # iso_path resolves into /nix/store/... via the result/ symlink, - # which is read-only. Write the .sha256 sidecar to RUNNER_TEMP - # (writable) so the upload step doesn't EROFS. - iso_name=$(basename "$iso_path") - sha256_path="${RUNNER_TEMP:-/tmp}/${iso_name}.sha256" - # Use the standard `sha256sum` format (` `) - # so consumers can verify with `sha256sum --check`. - ( cd "$(dirname "$iso_path")" && sha256sum "$iso_name" ) > "$sha256_path" - # Refuse to upload if RELEASE_TAG looks like a flag-injection - # vector. Tag names can legally start with `-`, which would - # make `gh release upload "$RELEASE_TAG" ...` parse the tag as - # a flag. Hard-fail if so — release operators should rename. - case "$RELEASE_TAG" in - -*) echo "::error::RELEASE_TAG starts with '-' which is a flag-injection risk: $RELEASE_TAG" >&2; exit 1 ;; - esac - # `--` separator: belt + suspenders defense against any future - # argv-injection vector even if the tag-name check above is - # bypassed somehow. - gh release upload --clobber -- "$RELEASE_TAG" \ - "$iso_path" \ - "$sha256_path" diff --git a/docs/BACKLOG.md b/docs/BACKLOG.md index fa0656155d..20297c7b72 100644 --- a/docs/BACKLOG.md +++ b/docs/BACKLOG.md @@ -886,5 +886,6 @@ are closed (status: closed in frontmatter)._ - [ ] **[B-0756](backlog/P3/B-0756-ha-control-plane-multi-master-embedded-etcd-quorum-and-stable-api-endpoint-aaron-2026-05-25.md)** HA control-plane — multi-master k3s embedded etcd quorum + stable API endpoint (DNS round-robin or kube-vip virtual IP) - [ ] **[B-0757](backlog/P3/B-0757-cluster-auto-discovery-mdns-bootstrap-or-join-unattended-growth-aaron-2026-05-25.md)** Cluster auto-discovery — mDNS bootstrap-or-join so 1st/2nd/3rd/Nth USB self-organizes into a growing cluster unattended - [ ] **[B-0788](backlog/P3/B-0788-agent-on-agent-claude-code-session-recovery-lift-operator-runs-gate-once-zeta-safety-substrate-stronger-than-classifier-2026-05-25.md)** Agent-on-agent Claude Code session recovery — lift operator-runs gate for `--apply` once Zeta safety substrate stronger than classifier +- [ ] **[B-0830](backlog/P3/B-0830-add-iso-release-attach-to-build-ai-cluster-iso-workflow-when-zeta-starts-tagging-releases-aaron-2026-05-26.md)** Add ISO release-attach to build-ai-cluster-iso.yml workflow when Zeta starts tagging releases diff --git a/docs/backlog/P3/B-0830-add-iso-release-attach-to-build-ai-cluster-iso-workflow-when-zeta-starts-tagging-releases-aaron-2026-05-26.md b/docs/backlog/P3/B-0830-add-iso-release-attach-to-build-ai-cluster-iso-workflow-when-zeta-starts-tagging-releases-aaron-2026-05-26.md new file mode 100644 index 0000000000..5b79da98b5 --- /dev/null +++ b/docs/backlog/P3/B-0830-add-iso-release-attach-to-build-ai-cluster-iso-workflow-when-zeta-starts-tagging-releases-aaron-2026-05-26.md @@ -0,0 +1,115 @@ +--- +id: B-0830 +title: Add ISO release-attach to build-ai-cluster-iso.yml workflow when Zeta starts tagging releases +status: open +priority: P3 +created: 2026-05-26 +last_updated: 2026-05-26 +depends_on: [] +composes_with: [] +--- + +# B-0830 — Add ISO release-attach to build-ai-cluster-iso.yml workflow (when Zeta starts tagging releases) (Aaron 2026-05-26) + +## Scope + +Add a `release: types: [published]` trigger + attach-to-release job to `.github/workflows/build-ai-cluster-iso.yml` so that when Zeta starts tagging GitHub releases, the AI-cluster installer ISO gets automatically built at the release tag + attached to the release as a downloadable asset. + +## Why this is a follow-up row (not done in USB cleanup PR 2) + +USB cleanup PR 2 retired the legacy `build-installer-iso.yml` workflow which had a release-attach feature. The canonical `build-ai-cluster-iso.yml` does NOT have the same feature. PR 2's substrate-honest scope was deletion-of-legacy; adding release-attach to the canonical workflow is its own focused engineering work + can wait until Zeta starts tagging releases (currently zero releases per `gh release list` 2026-05-26). + +## What the legacy workflow had (reference implementation) + +From `.github/workflows/build-installer-iso.yml` (deleted in USB cleanup PR 2): + +```yaml +on: + release: + types: [published] + +jobs: + build-iso: + if: github.event_name != 'release' # release uses attach-to-release-job below + # ... existing build steps + + attach-to-release: + if: github.event_name == 'release' + permissions: + contents: write # elevated for release-asset upload + # ... checkout at release tag, build ISO, upload as release asset +``` + +Pattern: + +- Regular PR / push / workflow_dispatch builds the ISO + uploads as workflow artifact (current state) +- Release `published` event triggers a separate job that rebuilds the ISO AT the release tag commit + uploads as release asset (so the release has a downloadable installer) +- Skip the regular build-iso job on release events (avoid building twice) + +## Acceptance + +Core trigger + job structure: + +- [ ] `release: types: [published]` trigger added to `build-ai-cluster-iso.yml` +- [ ] `attach-to-release` job added with `permissions: contents: write` (elevated only for that job) +- [ ] Job checks out at release tag SHA, builds canonical ISO, uploads as release asset +- [ ] Skip existing build-iso job on release events (no duplicate builds) + +Security + reliability safeguards (must preserve from deleted legacy workflow): + +- [ ] **Reject release tags starting with `-`** — prevents tag-name injection into `gh release upload` argument list (a tag like `-flag` could be interpreted as a flag rather than a positional): + + ```bash + if [[ "${RELEASE_TAG}" == -* ]]; then + echo "::error::Release tag starts with '-' which would be ambiguous as gh CLI argument; aborting" + exit 1 + fi + ``` + +- [ ] **Use `--` separator for `gh release upload`** — disambiguates tag-name + filename from flag arguments even when tag passes the leading-dash check: + + ```bash + gh release upload -- "${RELEASE_TAG}" "${ISO_PATH}" "${SHA256_SIDECAR_PATH}" + ``` + +- [ ] **Write SHA256 sidecar OUTSIDE the read-only Nix store** — the ISO at `result/iso/zeta-installer-*.iso` is a symlink into `/nix/store/...` which is read-only. The SHA256 sidecar (`.sha256`) must be written to a workflow-controlled writable directory (e.g., `$GITHUB_WORKSPACE` or `$RUNNER_TEMP`): + + ```bash + ISO_RESOLVED=$(readlink -f result/iso/zeta-installer-*.iso) + ISO_NAME=$(basename "${ISO_RESOLVED}") + SHA256_DIR="${RUNNER_TEMP}/iso-release" + mkdir -p "${SHA256_DIR}" + cp "${ISO_RESOLVED}" "${SHA256_DIR}/${ISO_NAME}" + ( cd "${SHA256_DIR}" && sha256sum "${ISO_NAME}" > "${ISO_NAME}.sha256" ) + ``` + + Then upload from `${SHA256_DIR}` not from `result/iso/`. + +Discipline + injection-safety (per existing `build-ai-cluster-iso.yml` patterns): + +- [ ] Runner pinned to `ubuntu-24.04` (not `-latest`) +- [ ] Third-party actions SHA-pinned with trailing `# vX.Y.Z` comments +- [ ] Concurrency groups (cancel-in-progress only for PRs, NOT for releases) +- [ ] No `github.event.*` values interpolated into `run:` lines — pass via `env:` per the GitHub Actions script-injection guide +- [ ] `permissions: contents: read` at workflow level; elevate to `contents: write` only on the `attach-to-release` job + +Test: + +- [ ] Tag a small release (e.g., `v0.0.1-test`) + verify ISO + sidecar attached + then untag +- [ ] Negative-test: tag a `-malicious` name + verify it aborts cleanly + +## Out of scope + +- Decision to start tagging releases at all (separate governance question) +- Release-versioning scheme (semver vs date-based; separate row when releases start) +- Release notes generation (separate row) + +## Composes with + +- `.github/workflows/build-ai-cluster-iso.yml` (the workflow this extends) +- `full-ai-cluster/usb-nixos-installer/` (the canonical AI-cluster installer substrate) +- USB cleanup PR 2 (this row's origin) + +## Origin + +USB cleanup PR 2 (2026-05-26) retired `build-installer-iso.yml`; the release-attach feature it carried was UNUSED at deletion time (zero existing releases per `gh release list`) but the capability is worth preserving for when Zeta starts tagging releases. This row tracks the canonical-workflow re-implementation. diff --git a/flake.nix b/flake.nix index 01efbe7205..8a1bb287ad 100644 --- a/flake.nix +++ b/flake.nix @@ -6,25 +6,28 @@ # # Bootstrap flow: # 1. Clone Zeta onto a workstation: git clone https://github.com/Lucent-Financial-Group/Zeta -# 2. Build the USB installer ISO: nix build .#installer-iso -# 3. dd the ISO to a USB stick: sudo dd if=result/iso/zeta-installer-*.iso of=/dev/sdX bs=4M status=progress +# 2. Build the USB installer ISO: cd full-ai-cluster/usb-nixos-installer && nix build .#installer-iso +# 3. Flash to USB (macOS): bun full-ai-cluster/tools/zflash.ts +# (Linux/Windows fallback: sudo dd if=result/iso/zeta-installer-*.iso of=/dev/sdX bs=4M status=progress) # 4. Boot a target machine on the stick. # 5. From the live system: nixos-install --flake /mnt/etc/zeta# # 6. Reboot. K3S + ArgoCD + Orleans land automatically from this flake. # # Companion files: -# infra/nixos/hosts/installer/configuration.nix — packages on the USB -# infra/nixos/hosts//configuration.nix — per-machine config -# infra/nixos/modules/*.nix — shared modules -# infra/k8s/applications/*/Application.yaml — ArgoCD App-of-Apps +# full-ai-cluster/usb-nixos-installer/ — canonical AI-cluster installer ISO substrate +# infra/nixos/hosts//configuration.nix — per-machine config +# infra/nixos/modules/*.nix — shared modules +# infra/k8s/applications/*/Application.yaml — ArgoCD App-of-Apps { description = "Zeta — declarative desired state for the AI cluster (NixOS + K3S + ArgoCD + Orleans)"; inputs = { - # Pin nixpkgs to the stable channel that the installer's - # system.stateVersion targets (24.11). Bump in lockstep with - # infra/nixos/hosts/installer/configuration.nix `system.stateVersion`. + # Pin nixpkgs to the stable channel. + # (Canonical AI-cluster installer at full-ai-cluster/usb-nixos-installer/ + # uses nixos-25.11 independently; this root flake stays on 24.11 for + # the per-host nixosConfigurations until those are bumped as a + # separate substrate landing.) nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; # Hardware-specific NixOS modules (e.g. common-cpu-amd, common-gpu-nvidia) @@ -67,22 +70,13 @@ # x86_64-darwin (Intel Macs) intentionally excluded: Rosetta 2 is # Apple-Silicon-only, and we don't ship a darwinConfiguration for # Intel Macs. Maintainers on Intel Macs use the CI workflow - # (.github/workflows/build-installer-iso.yml) to build the ISO. + # (.github/workflows/build-ai-cluster-iso.yml) to build the ISO. supportedSystems = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ]; - # Systems that can produce the installer-iso package. - # x86_64-linux — native build (CI runners, Linux maintainers) - # aarch64-darwin — dispatched via nix-darwin linux-builder VM - # (configured at infra/nix-darwin/configuration.nix) - isoBuildSystems = [ - "x86_64-linux" - "aarch64-darwin" - ]; - # Helper that wires up a NixOS system with shared specialArgs so # every host module can reference `inputs`, `stateVersion`, and # the `nixos-hardware` collection. @@ -101,16 +95,12 @@ # Install on a fresh machine: nixos-install --flake /mnt/etc/zeta# # Switch a running machine: sudo nixos-rebuild switch --flake .# # - # The `installer` config builds a bootable ISO image rather than a - # target-machine system. Use the `.#installer-iso` packages alias - # declared in flake-utils.eachSystem below. + # Installer ISO retired from root flake 2026-05-26 (USB cleanup PR 2): + # canonical AI-cluster installer now lives at + # full-ai-cluster/usb-nixos-installer/ and is built via the dedicated + # build-ai-cluster-iso.yml workflow. Per the human maintainer's + # "get rid of the old" cleanup direction. nixosConfigurations = { - installer = mkSystem { - modules = [ - ./infra/nixos/hosts/installer/configuration.nix - ]; - }; - control-plane = mkSystem { modules = [ ./infra/nixos/hosts/control-plane/configuration.nix @@ -166,23 +156,12 @@ pkgs = import nixpkgs { inherit system; }; in { - # The installer ISO is built from an x86_64-linux NixOS config. - # Published on: - # - x86_64-linux — native build (CI runners) - # - aarch64-darwin — Apple Silicon maintainers; dispatches - # via nix-darwin's linux-builder VM - # - x86_64-darwin — Intel Mac maintainers (same path) - # NOT published on aarch64-linux (would attempt a cross-build - # that fails at evaluation; no use case yet). - packages = nixpkgs.lib.optionalAttrs (builtins.elem system isoBuildSystems) { - # Convenience alias for the installer ISO. - # Build with: nix build .#installer-iso - # Result at: ./result/iso/zeta-installer-*.iso - installer-iso = - self.nixosConfigurations.installer.config.system.build.isoImage; - - default = self.packages.${system}.installer-iso; - }; + # installer-iso package retired from root flake 2026-05-26 + # (USB cleanup PR 2). Canonical AI-cluster ISO now lives at + # full-ai-cluster/usb-nixos-installer/ and is built via: + # cd full-ai-cluster/usb-nixos-installer && nix build .#installer-iso + # CI workflow: .github/workflows/build-ai-cluster-iso.yml + packages = { }; # --------------------------------------------------------------------- # devShells — `nix develop` to get a shell with cluster admin tools @@ -219,7 +198,7 @@ shellHook = '' echo "zeta-admin devShell ready." - echo " Build installer ISO: nix build .#installer-iso" + echo " Build installer ISO: cd full-ai-cluster/usb-nixos-installer && nix build .#installer-iso" echo " Build host system: nixos-rebuild build --flake .#" echo " Talk to cluster: kubectl / k9s / argocd / helm" ''; diff --git a/infra/README.md b/infra/README.md index d20bfc2759..4dc11608d3 100644 --- a/infra/README.md +++ b/infra/README.md @@ -35,14 +35,20 @@ infra/ ### 1. Build the installer ISO ```bash -# From any machine with Nix installed: -nix build .#installer-iso +# From any machine with Nix installed (canonical AI-cluster installer +# substrate at full-ai-cluster/usb-nixos-installer/ — root-flake +# installer-iso package retired 2026-05-26 in USB cleanup PR 2): +cd full-ai-cluster/usb-nixos-installer && nix build .#installer-iso # Output at result/iso/zeta-installer-*.iso ``` ### 2. Write it to a USB stick ```bash +# macOS (recommended): zflash — Touch ID + random nonce + SSH key auto-inject +bun full-ai-cluster/tools/zflash.ts + +# Linux / Windows fallback: sudo dd if=result/iso/zeta-installer-*.iso of=/dev/sdX bs=4M status=progress conv=fsync ``` diff --git a/infra/nix-darwin/README.md b/infra/nix-darwin/README.md index ad8d39636b..99101c1f60 100644 --- a/infra/nix-darwin/README.md +++ b/infra/nix-darwin/README.md @@ -4,8 +4,9 @@ nix-darwin configuration for maintainer Macs (Apple Silicon). The reason this directory exists is **one feature**: `nix.linux-builder`. It spins up a tiny Linux VM via Apple's Virtualization.framework that -Nix dispatches Linux builds to — so `nix build .#installer-iso` works -locally on an M-series Mac without Parallels, Lima, Docker, or a +Nix dispatches Linux builds to — so the canonical installer ISO build +(`cd full-ai-cluster/usb-nixos-installer && nix build .#installer-iso`) +works locally on an M-series Mac without Parallels, Lima, Docker, or a remote builder. ## Prerequisites @@ -42,8 +43,10 @@ What it does: From the Zeta repo root: ```bash -nix build .#installer-iso -# ↓ writes result/iso/zeta-installer-24.11.iso (~1.5-2 GB) +# Canonical AI-cluster installer substrate (root-flake installer-iso +# package retired 2026-05-26 in USB cleanup PR 2): +cd full-ai-cluster/usb-nixos-installer && nix build .#installer-iso +# ↓ writes result/iso/zeta-installer-25.11.iso (~1.5-2 GB) ``` First build takes ~10-15 min (downloads dependencies, boots the @@ -77,6 +80,6 @@ That picks up newer linux-builder VM images + any nixpkgs bumps. or not any maintainer has nix-darwin set up. This is purely a workstation convenience for building the ISO locally. - **NOT a replacement for the CI build.** The - [`build-installer-iso.yml`](../../.github/workflows/build-installer-iso.yml) + [`build-ai-cluster-iso.yml`](../../.github/workflows/build-ai-cluster-iso.yml) workflow stays the source of truth for "this PR's ISO" — local builds are for iteration, not for distribution. diff --git a/infra/nix-darwin/configuration.nix b/infra/nix-darwin/configuration.nix index 57ff38686d..357b895bbc 100644 --- a/infra/nix-darwin/configuration.nix +++ b/infra/nix-darwin/configuration.nix @@ -1,8 +1,9 @@ # infra/nix-darwin/configuration.nix # # nix-darwin host configuration for maintainer Macs (Apple Silicon). -# Activates the Linux builder VM so `nix build .#installer-iso` works -# locally on Apple Silicon without manual cross-compile gymnastics. +# Activates the Linux builder VM so the canonical AI-cluster installer +# ISO build (`cd full-ai-cluster/usb-nixos-installer && nix build .#installer-iso`) +# works locally on Apple Silicon without manual cross-compile gymnastics. # # Apply on a Mac that already has Nix installed. Recommended installer: # the Determinate Nix macOS package at @@ -13,9 +14,10 @@ # nix run nix-darwin/nix-darwin-24.11#darwin-rebuild -- switch \ # --flake /path/to/Zeta#zeta-mac # -# After the first switch, `nix build .#installer-iso` from the Zeta -# repo root builds the x86_64-linux ISO via Apple Virtualization.framework +# After the first switch, the canonical AI-cluster ISO builds from +# full-ai-cluster/usb-nixos-installer/ via Apple Virtualization.framework # + Rosetta 2 — no Parallels, no Lima, no remote builder. +# (Root-flake installer-iso package retired 2026-05-26 in USB cleanup PR 2.) { config, pkgs, lib, ... }: diff --git a/infra/nixos/hosts/installer/configuration.nix b/infra/nixos/hosts/installer/configuration.nix deleted file mode 100644 index 48c5c9f2c4..0000000000 --- a/infra/nixos/hosts/installer/configuration.nix +++ /dev/null @@ -1,296 +0,0 @@ -# infra/nixos/hosts/installer/configuration.nix -# -# Single-file declarative desired state for the USB-stick (or netboot) -# NixOS installer image used to bootstrap every machine in the AI cluster. -# -# Built by the flake at the repo root: -# nix build .#nixosConfigurations.installer.config.system.build.isoImage -# Then `dd` the produced ISO to a USB stick, boot the target machine, -# clone this repo, and run `nixos-install --flake .#` against the -# desired host config (control-plane, worker-gpu-01, ...). -# -# Scope: ONLY packages needed ON THE STICK to reach the point where -# `nixos-install` can take over and pull everything else (K3S, ArgoCD, -# Orleans, GitLab, Argo Workflows, Argo Rollouts) from the flake. -# Do NOT add cluster runtime packages here — they belong in the -# per-host modules under infra/nixos/modules/. - -{ config, pkgs, lib, modulesPath, ... }: - -{ - imports = [ - # Use the upstream minimal installation CD as a base. - # Gives us a working live system, getty, nix, and the installer plumbing. - "${modulesPath}/installer/cd-dvd/installation-cd-minimal.nix" - "${modulesPath}/installer/cd-dvd/channel.nix" - ]; - - # --------------------------------------------------------------------------- - # Identity - # --------------------------------------------------------------------------- - networking.hostName = "zeta-installer"; - time.timeZone = "America/New_York"; - i18n.defaultLocale = "en_US.UTF-8"; - - # --------------------------------------------------------------------------- - # Nix / Flakes - # --------------------------------------------------------------------------- - nix.settings = { - experimental-features = [ "nix-command" "flakes" ]; - auto-optimise-store = true; - # Trust the live user so `nixos-install --flake` works without sudo dance - trusted-users = [ "root" "nixos" ]; - # Big public caches so installs are fast even from the stick - substituters = [ - "https://cache.nixos.org" - "https://nix-community.cachix.org" - ]; - trusted-public-keys = [ - "cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=" - "nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=" - ]; - }; - - # --------------------------------------------------------------------------- - # Networking — wired + wireless so any machine can phone home to clone Zeta - # --------------------------------------------------------------------------- - networking.networkmanager.enable = true; - networking.wireless.enable = lib.mkForce false; # NM handles wifi instead - # Firewall ON by default. The installer ISO is a live system that often - # boots on networks we don't control (hotel wifi, conference LAN, home - # router with port-forwarding); leaving it firewalled keeps the install - # session from being trivially probed. - networking.firewall.enable = true; - - # SSH is OFF by default — installer is intended for console use. Enable - # manually on the live system for headless installs: - # - # sudo passwd nixos # set a password first - # sudo systemctl start sshd # start the service - # - # Key-only is enforced when enabled — never password auth, never root - # password login. For pre-seeded headless installs, drop the maintainer - # SSH key into `users.users.nixos.openssh.authorizedKeys.keys` here - # before building the ISO. - services.openssh = { - # mkForce: upstream installation-cd-minimal.nix enables SSH by - # default. We force it OFF to keep the installer console-only - # by default (per the no-credentials-in-Git security posture - # documented above). Without mkForce, the module-merge fails - # eval with `option 'services.openssh.enable' has conflicting - # definition values: true (upstream) vs false (ours)`. - enable = lib.mkForce false; - settings = { - PermitRootLogin = lib.mkForce "prohibit-password"; - PasswordAuthentication = lib.mkForce false; - KbdInteractiveAuthentication = lib.mkForce false; - }; - }; - - # No hard-coded credentials. The upstream installation-cd-minimal.nix - # imported above ships with a passwordless root account usable ONLY from - # the local console — the secure default for an ephemeral live system. - # If you need a non-root user, set its password manually on the live - # system with `passwd`. Sudo requires a password (default policy). - users.users.nixos = { - isNormalUser = true; - extraGroups = [ "wheel" "networkmanager" ]; - # initialPassword intentionally unset — see comment above. - }; - - # --------------------------------------------------------------------------- - # THE PACKAGE LIST — everything that must be on the USB stick - # --------------------------------------------------------------------------- - environment.systemPackages = with pkgs; [ - - # --- Version control: pull the Zeta flake onto the target --------------- - git - git-lfs - gnupg - openssh - - # --- Editors (pick your poison; ship both, they're tiny) ---------------- - vim - neovim - nano - - # --- Shell quality of life ---------------------------------------------- - bash - zsh - tmux - screen - htop - btop - tree - ripgrep - fd - fzf - bat - eza - jq - yq-go - less - file - which - unzip - zip - p7zip - rsync - - # --- Network: reach the internet, GitHub, and the local LAN ------------- - curl - wget - iproute2 # ip(8) - iputils # ping - inetutils # traceroute, telnet - dnsutils # dig, nslookup - nmap - tcpdump - mtr - ethtool - bind # host - networkmanager # nmcli/nmtui - iwd # wifi backend for NM - wpa_supplicant # fallback wifi - openvpn # in case the install network needs VPN - wireguard-tools - - # --- Disk / partitioning / filesystems ---------------------------------- - parted - gptfdisk # sgdisk - util-linux # fdisk, lsblk, blkid, wipefs - cryptsetup # LUKS for encrypted root - dosfstools # FAT32 for EFI - e2fsprogs # ext4 - xfsprogs - btrfs-progs - zfs # ZFS root is common on NixOS AI rigs - lvm2 - mdadm # software RAID - smartmontools # disk health before committing - - # --- Hardware inspection (know your machine before installing) ---------- - pciutils # lspci - usbutils # lsusb - lshw - dmidecode - hwinfo - inxi - lm_sensors - nvme-cli - hdparm - - # --- GPU detection (NVIDIA + AMD; drivers come in per-host gpu.nix) ----- - glxinfo - vulkan-tools - clinfo - - # --- NixOS install tooling (already in the base image, listed for - # discoverability) --------------------------------------------------- - nixos-install-tools - nix-output-monitor # `nom` — prettier nix build output - nvd # nix version diff - nh # friendly nixos rebuild wrapper - - # --- Kubernetes / GitOps clients on the stick so you can poke a - # just-installed control plane from the live USB before reboot ------ - kubectl - kubernetes-helm - k9s - argocd # ArgoCD CLI - k3s # binary present so the installer can pre-seed if wanted - - # --- Container runtime tooling (debug only on the stick) ---------------- - skopeo - crane - - # --- Secrets management (so encrypted secrets in the flake can be - # decrypted during install) ----------------------------------------- - age - sops - ssh-to-age - - # --- Build-time helpers you'll inevitably want ------------------------- - coreutils - findutils - gawk - gnused - gnugrep - diffutils - patch - gcc # bootstrap compiler if a flake input needs it - gnumake - pkg-config - - # --- Observability of the install itself -------------------------------- - iotop - iftop - ncdu - pv - progress - - # --- Documentation on the stick (no internet? still readable) ----------- - man-pages - man-pages-posix - tldr - ]; - - # --------------------------------------------------------------------------- - # ISO branding - # --------------------------------------------------------------------------- - isoImage = { - isoName = lib.mkForce "zeta-installer-${config.system.nixos.release}.iso"; - volumeID = lib.mkForce "ZETA_INSTALL"; - makeEfiBootable = true; - makeUsbBootable = true; - }; - - # Install-runbook baked onto the stick at /etc/zeta-install.md so it's - # reachable from the live system even when offline. - # - # NOTE: this writes documentation only. The Zeta flake itself is NOT - # auto-staged on the ISO — clone it from network during install - # (`git clone https://github.com/Lucent-Financial-Group/Zeta /mnt/etc/zeta`). - # An always-on flake-bundling pass would require build-time access to - # the flake source from this module's evaluation, which a future - # `flake.nix` at the repo root will wire via `imports = [ ./infra/... ]` - # plus `environment.etc."zeta".source = inputs.self;` — track in a - # follow-up PR. - environment.etc."zeta-install.md".text = '' - Zeta cluster installer - ====================== - - 1. Boot this USB on the target machine. - 2. Log in at the console as `root` (no password — upstream installer - default; only usable from the local TTY). - 3. Bring up the network (NetworkManager is enabled): - nmtui # interactive, or - nmcli device wifi connect password - 4. Identify the target disk: - lsblk - 5. Clone Zeta onto /mnt/etc/zeta after partitioning: - git clone https://github.com/Lucent-Financial-Group/Zeta /mnt/etc/zeta - 6. Generate hardware config for this machine + copy it into the - per-host directory so the install picks it up: - nixos-generate-config --root /mnt - cp /mnt/etc/nixos/hardware-configuration.nix \ - /mnt/etc/zeta/infra/nixos/hosts//hardware-configuration.nix - (commit the per-host hardware-configuration.nix after install - so future rebuilds reproduce the same boot environment.) - 7. Install: - nixos-install --flake /mnt/etc/zeta# - — where is one of the names declared in the repo-root - `flake.nix` `nixosConfigurations`: - installer — this USB ISO config (not for target install) - control-plane — K3S server + ArgoCD bootstrap - worker-gpu-01 — NVIDIA AI worker (joins control-plane) - worker-gpu-02 — NVIDIA AI worker (joins control-plane) - 8. Reboot. K3S, ArgoCD, Orleans land automatically from the flake. - - The flake itself is the tick source. Everything downstream reconciles - toward the desired state declared in Git. - ''; - - # NixOS release that this installer targets. - system.stateVersion = "24.11"; -}