Skip to content
233 changes: 233 additions & 0 deletions tools/ci/docker-nixos-install-sh-test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
#!/usr/bin/env bun
/**
* tools/ci/docker-nixos-install-sh-test.ts
*
* B-0849 Phase 1 — TS wrapper for the Docker NixOS install.sh test
* harness. Per .claude/rules/rule-0-no-sh-files.md: TS-over-bash for
* DST + cross-platform. Wraps `docker build` of
* tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile with:
*
* - exit-code mapping (build success = 0; build failure = 1)
* - log capture (saved to workspace-relative path for CI artifact)
* - timeout enforcement (default 600s — install.sh + mise + bun
* + claude-code download can take a while on cold cache)
* - build-context discipline (uses repo root as context;
* dockerfile is at the fixed path; doesn't pollute root with
* build artifacts)
*
* Composes with B-0831 cascade #6 QEMU full-install test
* (qemu-full-install-test.ts): Docker = fast iteration (~30-60 sec);
* QEMU = end-to-end virtualized boot (~15 min). Both run on CI for
* install-substrate PRs.
*
* Operator framing 2026-05-27 (Aaron): "we should add docker based
* nixos install.sh testing so we can iterate quick that's an easy
Comment thread
AceHack marked this conversation as resolved.
Outdated
* dockerfile" → B-0849 backlog row → this implementation.
*
* Usage:
* bun tools/ci/docker-nixos-install-sh-test.ts [--keep-image]
*
* Flags:
* --keep-image Don't `docker rmi` after the test (default: cleanup)
*
* Env:
* DOCKER_BUILD_TIMEOUT_SEC Override timeout (default 600)
* DOCKER_LOG_OUT_PATH Override log path (default
* workspace-relative .docker-test-log)
Comment thread
AceHack marked this conversation as resolved.
Outdated
*
* Exit codes:
* 0 — Docker build succeeded (install.sh + mise + bun + claude-code
* all validated on NixOS userspace)
* 1 — Docker build failed (one of the validation steps in the
* Dockerfile failed; see log)
* 2 — Usage error / missing prerequisites (docker not installed,
* wrong working directory, etc.)
* 124 — Timeout (build exceeded DOCKER_BUILD_TIMEOUT_SEC)
*/

import { spawnSync } from "node:child_process";
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
import { resolve } from "node:path";

Comment thread
AceHack marked this conversation as resolved.
const DOCKERFILE_PATH = "tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile";
const IMAGE_TAG = "zeta-nixos-install-sh-test:local";
const DEFAULT_TIMEOUT_SEC = 600;
const DEFAULT_LOG_PATH = ".docker-test-log";

Comment thread
AceHack marked this conversation as resolved.
interface BuildResult {
exitCode: 0 | 1 | 2 | 124;
reason: string;
logTail?: string;
}

function usage(): never {
console.error(
"usage: bun tools/ci/docker-nixos-install-sh-test.ts [--keep-image]"
);
console.error("");
console.error("env:");
console.error(" DOCKER_BUILD_TIMEOUT_SEC override timeout (default 600)");
console.error(" DOCKER_LOG_OUT_PATH override log path");
process.exit(2);
}

function checkPrereqs(): void {
// Verify docker is installed
const docker = spawnSync("docker", ["--version"], { encoding: "utf8" });
if (docker.status !== 0) {
console.error("error: docker not installed or not on PATH");
console.error(" install via the standard mechanism for your OS");
process.exit(2);
}
Comment thread
AceHack marked this conversation as resolved.

// Verify we're at repo root (Dockerfile path is repo-relative)
if (!existsSync(DOCKERFILE_PATH)) {
console.error(`error: ${DOCKERFILE_PATH} not found`);
console.error(
" run from repo root: bun tools/ci/docker-nixos-install-sh-test.ts"
);
process.exit(2);
}

// Verify .mise.toml is at repo root (Dockerfile COPYs it)
if (!existsSync(".mise.toml")) {
console.error("error: .mise.toml not found at repo root");
process.exit(2);
}
}

function runBuild(timeoutSec: number, logPath: string): BuildResult {
const startMs = Date.now();
const buildArgs = [
"build",
"--file",
DOCKERFILE_PATH,
"--tag",
IMAGE_TAG,
// --progress=plain prints full output (vs --progress=auto which
// collapses for terminals); we want full output captured to log
"--progress=plain",
// Build context = current dir (repo root)
".",
];
Comment thread
AceHack marked this conversation as resolved.

console.log(`[B-0849 Phase 1] docker build ${buildArgs.join(" ")}`);
console.log(`[B-0849 Phase 1] timeout: ${timeoutSec}s; log: ${logPath}`);

// spawnSync with timeout converted to milliseconds
const result = spawnSync("docker", buildArgs, {
encoding: "utf8",
timeout: timeoutSec * 1000,
Comment thread
AceHack marked this conversation as resolved.
Outdated
// Combine stdout + stderr for the log
stdio: ["ignore", "pipe", "pipe"],
});

const elapsedSec = Math.floor((Date.now() - startMs) / 1000);

// Capture full output to log file
const fullLog = (result.stdout ?? "") + (result.stderr ?? "");
writeFileSync(logPath, fullLog, "utf8");

// Extract tail for the return-value reason
const logTail = fullLog.split("\n").slice(-20).join("\n");

if (result.signal === "SIGTERM" || result.error?.code === "ETIMEDOUT") {
return {
exitCode: 124,
reason: `docker build timed out after ${timeoutSec}s (actual: ${elapsedSec}s)`,
logTail,
};
}

if (result.status === 0) {
console.log(
`[B-0849 Phase 1] SUCCESS — docker build completed in ${elapsedSec}s`
);
return {
exitCode: 0,
reason: `docker build succeeded in ${elapsedSec}s`,
logTail,
};
}

return {
exitCode: 1,
reason: `docker build failed (exit ${result.status}) after ${elapsedSec}s`,
logTail,
};
}

function cleanup(keepImage: boolean): void {
if (keepImage) {
console.log(
`[B-0849 Phase 1] --keep-image set; image ${IMAGE_TAG} retained for inspection`
);
return;
}
const rm = spawnSync("docker", ["rmi", "-f", IMAGE_TAG], {
encoding: "utf8",
});
if (rm.status === 0) {
console.log(`[B-0849 Phase 1] cleaned up image ${IMAGE_TAG}`);
} else {
console.error(
`[B-0849 Phase 1] warning: docker rmi ${IMAGE_TAG} failed (non-fatal)`
);
}
Comment thread
AceHack marked this conversation as resolved.
Outdated
}

function main(): void {
// Parse args
const args = process.argv.slice(2);
let keepImage = false;
for (const arg of args) {
if (arg === "--keep-image") {
keepImage = true;
} else if (arg === "--help" || arg === "-h") {
usage();
} else {
console.error(`error: unknown arg: ${arg}`);
usage();
}
}

// Resolve env overrides
const timeoutSec = parseInt(
process.env.DOCKER_BUILD_TIMEOUT_SEC ?? String(DEFAULT_TIMEOUT_SEC),
10
);
if (!Number.isFinite(timeoutSec) || timeoutSec <= 0) {
console.error(
`error: DOCKER_BUILD_TIMEOUT_SEC must be a positive integer (got: ${process.env.DOCKER_BUILD_TIMEOUT_SEC})`
);
process.exit(2);
}
const logPath = resolve(process.env.DOCKER_LOG_OUT_PATH ?? DEFAULT_LOG_PATH);

// Ensure log directory exists
const logDir = logPath.substring(0, logPath.lastIndexOf("/"));
if (logDir && !existsSync(logDir)) {
mkdirSync(logDir, { recursive: true });
}
Comment thread
AceHack marked this conversation as resolved.

checkPrereqs();

const result = runBuild(timeoutSec, logPath);

console.log("");
console.log(`[B-0849 Phase 1] result: ${result.reason}`);
console.log(`[B-0849 Phase 1] log: ${logPath}`);

if (result.exitCode !== 0) {
console.log("[B-0849 Phase 1] tail of build log:");
console.log("--- BEGIN TAIL ---");
console.log(result.logTail);
console.log("--- END TAIL ---");
}

cleanup(keepImage);

process.exit(result.exitCode);
}

main();
85 changes: 85 additions & 0 deletions tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile
#
# B-0849 Phase 1 — fast-iteration test harness for tools/setup/install.sh
# on NixOS userspace. Validates that linux.sh's /etc/NIXOS detection
# branch (added in iter-5.5.1 PR #5389) correctly routes to common/
# mise.sh + bootstrap installs cleanly.
#
# Cycle time: ~30-60 sec (vs B-0831 QEMU full-install ~15 min vs
# physical USB install ~30+ min). Complementary to B-0831 cascade #6.
#
# Operator framing 2026-05-27: "we should add docker based nixos
# install.sh testing so we can iterate quick that's an easy
# dockerfile" + "nixos*" (correcting earlier typo).
#
# Why nixos/nix base: provides nix-the-package-manager + nix-shell +
# /nix/store userspace. NOT a full NixOS init system (no systemd in
# the container), so this validates install.sh's userspace behavior
# only — full-system tests still belong in B-0831 QEMU cascade.
#
# Required runtime arg: none. Repo gets COPY'd at build time. The
# install.sh runs as root in the container (CI usage pattern) — the
# linux.sh script handles root-vs-sudo via `id -u` check, so this
# matches CI runner contexts.

FROM nixos/nix:latest
Comment thread
AceHack marked this conversation as resolved.
Outdated

# /etc/NIXOS marker file — what linux.sh's NixOS detection branch uses
# to skip the apt step (per iter-5.5.1 PR #5389). Without this marker
# the test would not exercise the NixOS-specific path; install.sh
# would bail with "this script currently supports Debian/Ubuntu only".
RUN touch /etc/NIXOS

# Enable nix flakes (needed for tools/setup/common/mise.sh and other
# substrate that uses flake-style invocation).
RUN mkdir -p /etc/nix && \
echo "experimental-features = nix-command flakes" > /etc/nix/nix.conf

# Workspace = repo root mounted at build time
WORKDIR /workspace

# COPY only the install-relevant subset (substrate-honest: don't COPY
# .git or unrelated tooling that would invalidate cache on unrelated
# changes). The install.sh + linux.sh + macos.sh + common/ +
# manifests/ + .mise.toml are all that's needed for this harness.
COPY tools/setup /workspace/tools/setup
COPY .mise.toml /workspace/.mise.toml
# package.json + bun.lock pin TS-runtime deps if install.sh references
# them (e.g., bun --version checks); copy to mirror dev environment
COPY package.json bun.lock* /workspace/

# Run install.sh — this exercises:
# 1. install.sh dispatch (detects Linux → linux.sh)
# 2. linux.sh /etc/NIXOS detection → skip apt step
# 3. mise.sh installation
# 4. common/mise.sh installs .mise.toml runtimes (bun, node, python,
# java, uv, actionlint, shellcheck, etc.)
# 5. downstream common/* steps (python-tools, elan, dotnet-tools,
# verifiers, shellenv)
# Exits non-zero if any step fails — the docker build itself fails,
# which CI surfaces as a build failure.
RUN tools/setup/install.sh

# Validation step 1: bun installed via mise + matches .mise.toml pin
# (currently bun = "1.3" per .mise.toml line 33).
RUN bash -lc 'eval "$(mise activate bash)" && bun --version | grep -q "^1\." || (echo "ERROR: bun not installed or wrong version" && exit 1)'
Comment thread
AceHack marked this conversation as resolved.
Outdated

# Validation step 2: claude-code installable via bun (validates the
# iter-5.5.0 substrate's bun install --global pattern works on NixOS
# userspace). Doesn't actually run claude-code (would need API key);
# just verifies the install succeeds + the binary lands at the
# expected path.
RUN bash -lc 'eval "$(mise activate bash)" && \
Comment thread
AceHack marked this conversation as resolved.
Outdated
mkdir -p /root/.bun/bin && \
BUN_INSTALL=/root/.bun bun install --global @anthropic-ai/claude-code 2>&1 | tail -5 && \
test -x /root/.bun/bin/claude || (echo "ERROR: claude not at expected path" && exit 1)'
Comment thread
AceHack marked this conversation as resolved.

# Validation step 3: gh installed via mise / nix (the iter-5.5.0 Bug 5
# fix added gh to common.nix systemPackages; in Docker we don't have
# the NixOS module evaluating, so we install via nix-shell as a proxy
# check).
RUN nix-shell -p gh --run 'gh --version | head -1'

# Final marker — if all steps succeed, this echo lands in the build
# output as the success signal for CI.
RUN echo "B-0849 Phase 1 Docker harness validation COMPLETE — install.sh + mise + bun + claude-code all working on NixOS userspace"
Loading