Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/build-ai-cluster-iso.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,41 @@ jobs:
# workflow run hangs).
bun tools/ci/audit-installer-iso-content.ts --iso "$iso_abs"

# QEMU boot smoke-test (USB cleanup PR 3 — 2026-05-26):
# Boots the built ISO in QEMU/KVM with serial console capture +
# asserts the installer's expected login prompt appears within
# 5min. Catches the bug class where the ISO builds + audits pass
# but the kernel/initrd combination fails to actually boot
# (firmware mismatch; missing module; broken init; etc.). The
# source-substrate audit (cascade #1) + ISO-content audit
# (cascade #4) catch static issues; this catches dynamic
# boot-time issues. Prior art: nixos/tests/installer.nix
# (per Kestrel 2026-05-26 ferry pointer).
#
# ubuntu-24.04 runners support nested KVM (/dev/kvm); helper
# falls back to TCG emulation when KVM unavailable.
#
# Security: this step uses no `github.event.*` interpolation in
# `run:` lines; all inputs are filesystem paths from prior steps
# of THIS workflow. The TS helper at tools/ci/qemu-boot-test.ts
# takes the ISO path as a positional CLI arg (no shell expansion
# of attacker-controllable strings).
- name: Install QEMU (apt)
run: sudo apt-get update -y && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends qemu-system-x86

- name: QEMU boot smoke-test (cascade #5 — dynamic boot floor)
working-directory: full-ai-cluster
run: |
set -euo pipefail
mapfile -t iso_candidates < <(find result/iso -maxdepth 1 -type f \( -name 'zeta-installer-*.iso' -o -name 'nixos-minimal-*.iso' \) | sort)
if [ "${#iso_candidates[@]}" -eq 0 ]; then
echo "::error::QEMU boot test: No installer ISO under result/iso/" >&2
exit 1
fi
iso_abs=$(readlink -f "${iso_candidates[0]}")
echo "Booting ISO: $iso_abs"
bun ../tools/ci/qemu-boot-test.ts "$iso_abs"

- name: Locate ISO + capture metadata
id: iso
working-directory: full-ai-cluster
Expand Down
185 changes: 185 additions & 0 deletions tools/ci/qemu-boot-test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#!/usr/bin/env bun
/**
* tools/ci/qemu-boot-test.ts
*
* QEMU boot smoke-test for the canonical Zeta installer ISO.
*
* Boots the ISO in QEMU/KVM with serial console output captured to a
* log file, waits up to TIMEOUT_SECONDS for the expected login prompt
* matching the installer's networking.hostName (`zeta-installer`), then
* shuts down cleanly.
*
* Per Rule 0 (TS-over-bash for DST + cross-platform) + Kestrel's
* 2026-05-26 ferry pointer to nixos/tests/installer.nix prior art.
* Composes with full-ai-cluster/usb-nixos-installer/ canonical
* installer + the build-ai-cluster-iso.yml workflow's post-build audit
* stack.
*
* Usage:
* bun tools/ci/qemu-boot-test.ts <iso-path>
*
* Exit codes:
* 0 — boot succeeded (login prompt observed within timeout)
* 1 — boot failed (timeout or QEMU error)
* 2 — usage error (bad args or missing dependencies)
*
* GitHub Actions context: ubuntu-24.04 runners have /dev/kvm available
* for nested KVM acceleration. Install qemu-system-x86 + ovmf before
* invocation. Tested boot time ~60-180s on cold-boot KVM.
*/

import { spawn } from "node:child_process";
import { existsSync, mkdtempSync, readFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";

const EXPECTED_HOSTNAME = "zeta-installer";
const EXPECTED_LOGIN_PROMPT = `${EXPECTED_HOSTNAME} login:`;
const TIMEOUT_SECONDS = 300; // 5 min — generous; typical boot is 60-180s
const POLL_INTERVAL_MS = 1000;
const MEMORY_MB = 2048; // installer needs >= 1GB; 2GB gives headroom for nix
const KVM_PATH = "/dev/kvm";

interface BootResult {
exitCode: 0 | 1 | 2;
reason: string;
serialLogTail?: string;
}

function usage(): never {
console.error("usage: bun tools/ci/qemu-boot-test.ts <iso-path>");
process.exit(2);
}

function checkDependencies(): string | null {
// qemu-system-x86_64 must be installed (apt-get install qemu-system-x86)
try {
const result = Bun.spawnSync(["qemu-system-x86_64", "--version"]);
if (result.exitCode !== 0) {
return "qemu-system-x86_64 not found or non-zero exit; install via `apt-get install -y qemu-system-x86`";
}
} catch {
return "qemu-system-x86_64 not found in PATH; install via `apt-get install -y qemu-system-x86`";
}
return null;
}

function buildQemuArgs(isoPath: string, serialLogPath: string): string[] {
const args: string[] = [
"-machine", "q35",
"-m", String(MEMORY_MB),
"-smp", "2",
"-cdrom", isoPath,
"-boot", "d",
"-serial", `file:${serialLogPath}`,
"-display", "none",
"-no-reboot",
// BIOS instead of UEFI — simpler boot path; ISO supports both but
// BIOS requires no extra firmware package.
];

// KVM acceleration when /dev/kvm is available (GitHub Actions
// ubuntu-24.04 supports nested KVM). Falls back to TCG (slow but
// works) when KVM unavailable (e.g., macOS local testing).
if (existsSync(KVM_PATH)) {
args.push("-enable-kvm", "-cpu", "host");
} else {
args.push("-cpu", "qemu64");
console.warn(`[qemu-boot-test] ${KVM_PATH} not available; using TCG emulation (will be slow)`);
}

return args;
}

async function waitForLoginPrompt(serialLogPath: string): Promise<BootResult> {
const deadline = Date.now() + TIMEOUT_SECONDS * 1000;

while (Date.now() < deadline) {
if (existsSync(serialLogPath)) {
try {
const content = readFileSync(serialLogPath, "utf8");
if (content.includes(EXPECTED_LOGIN_PROMPT)) {
const tail = content.slice(-500);
return {
exitCode: 0,
reason: `Login prompt observed: "${EXPECTED_LOGIN_PROMPT}"`,
serialLogTail: tail,
};
}
} catch {
// Log file in transit; retry on next poll
}
}
await Bun.sleep(POLL_INTERVAL_MS);
}

const tail = existsSync(serialLogPath)
? readFileSync(serialLogPath, "utf8").slice(-2000)
: "(serial log empty or never created)";
return {
exitCode: 1,
reason: `Timeout (${TIMEOUT_SECONDS}s) waiting for "${EXPECTED_LOGIN_PROMPT}"`,
serialLogTail: tail,
};
}

async function main(): Promise<never> {
const [isoPath] = process.argv.slice(2);
if (!isoPath) usage();

if (!existsSync(isoPath)) {
console.error(`[qemu-boot-test] ISO not found: ${isoPath}`);
process.exit(2);
}

const depErr = checkDependencies();
if (depErr) {
console.error(`[qemu-boot-test] ${depErr}`);
process.exit(2);
}

const tmpDir = mkdtempSync(join(tmpdir(), "zeta-qemu-boot-test-"));
const serialLogPath = join(tmpDir, "serial.log");

console.log(`[qemu-boot-test] ISO: ${isoPath}`);
console.log(`[qemu-boot-test] Serial log: ${serialLogPath}`);
console.log(`[qemu-boot-test] Memory: ${MEMORY_MB}MB; timeout: ${TIMEOUT_SECONDS}s`);
console.log(`[qemu-boot-test] Expecting login prompt: "${EXPECTED_LOGIN_PROMPT}"`);

const qemuArgs = buildQemuArgs(isoPath, serialLogPath);
console.log(`[qemu-boot-test] Launching: qemu-system-x86_64 ${qemuArgs.join(" ")}`);

const qemu = spawn("qemu-system-x86_64", qemuArgs, {
stdio: ["ignore", "inherit", "inherit"],
});

let qemuExited = false;
qemu.on("exit", (code) => {
qemuExited = true;
console.log(`[qemu-boot-test] QEMU exited with code ${code}`);
});

const result = await waitForLoginPrompt(serialLogPath);

if (!qemuExited) {
console.log(`[qemu-boot-test] Killing QEMU (PID ${qemu.pid})`);
qemu.kill("SIGTERM");
setTimeout(() => {
if (!qemuExited) qemu.kill("SIGKILL");
}, 5000);
}

console.log("");
console.log("=== Result ===");
console.log(`Exit code: ${result.exitCode}`);
console.log(`Reason: ${result.reason}`);
if (result.serialLogTail) {
console.log("");
console.log("=== Serial log tail ===");
console.log(result.serialLogTail);
}

process.exit(result.exitCode);
}

main();
Loading