diff --git a/.github/workflows/gate.yml b/.github/workflows/gate.yml index 521f2919..31b1772f 100644 --- a/.github/workflows/gate.yml +++ b/.github/workflows/gate.yml @@ -14,7 +14,10 @@ # nightly schedule (in practice every merge, since direct # pushes are blocked by branch protection). # Lint jobs pinned to ubuntu-24.04 (short-lived, OS-independent -# work). Windows legs deferred to peer-harness milestone. +# work; bumped from 22.04 on 2026-04-28 per the human +# maintainer's input + version-currency discipline — +# ubuntu-latest = 24.04 since Jan 2025). +# Windows legs deferred to peer-harness milestone. # - Third-party actions SHA-pinned by full 40-char commit SHA; # trailing `# vX.Y.Z` comments for humans. # - permissions: contents: read at the workflow level; no job @@ -313,16 +316,72 @@ jobs: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Cache install.sh outputs (mise runtimes + dotnet tools + verifier jars) + # Comprehensive cache of everything `tools/setup/install.sh` + # writes — keeps dev laptops and CI runners as close to the + # same state as possible per the human maintainer's + # 2026-04-28 input: dev setup and build-machine setup + # should be as close to the same as possible; cache the + # whole install/setup output rather than per-component. + # Without comprehensive caching every CI run hits CDNs cold + # (mise.run + GitHub releases for bun/shellcheck/actionlint + + # NuGet + Lean toolchain). Transient 502s become avoidable + # failures rather than first-run-only cost. + # Cache key hashes BOTH .mise.toml (runtime versions) AND + # tools/setup/** (install logic itself) so changes to either + # invalidate cache → vanilla install path gets re-tested + # whenever the install discipline changes. + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.local/bin/mise + ~/.local/share/mise + ~/.cache/mise + ~/.dotnet/tools + ~/.elan + ~/.config/zeta + key: install-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('.mise.toml', 'tools/setup/**', 'global.json') }} + - name: Install toolchain via three-way-parity script (GOVERNANCE §24) # Installs shellcheck via mise (pinned in .mise.toml). Single # source of truth — the same version on dev laptops + CI # runners. Prior step relied on shellcheck shipping pre- - # installed on ubuntu-22.04 (the older runner image), which - # broke parity (dev machines may have a different version) - # and wouldn't survive newer runner images like ubuntu-slim - # that don't ship shellcheck. Same parity concern applies on - # ubuntu-24.04 — install via mise regardless. - run: ./tools/setup/install.sh + # installed on ubuntu (any version), which broke parity (dev machines + # may have a different version) and wouldn't survive newer + # runner images like ubuntu-slim that don't ship shellcheck. + # Note: install.sh is idempotent (detect-first-install-else-update); + # safe to run on every CI invocation including cache-hit (it + # short-circuits when tools are already present). + # CI-level retry per the human maintainer's 2026-04-28 + # input: PR #23 (on AceHack) failed because mise's + # internal 3-attempt retry exhausted on a github releases + # CDN 502 for bun-1.3.13. CI runs are non-interactive so + # retry is safe; dev runs stay interactive (user decides). + # 5 attempts; backoff 10s/30s/60s/120s covers short-burst + # through multi-minute CDN outages. The retry wrapper is + # currently inline-duplicated across multiple jobs; + # extracting to a composite action is a follow-up + # improvement candidate. + run: | + set -euo pipefail + for attempt in 1 2 3 4 5; do + if ./tools/setup/install.sh; then exit 0; fi + [ "$attempt" = "5" ] && { echo "install.sh failed after 5 attempts"; exit 1; } + # Backoff: 10s, 30s, 60s, 120s — covers transient CDN + # blips from short-burst-and-recover up to multi-minute + # outages. Aaron 2026-04-28 input: bumped from 3 to 5 + # because PR #23's bun-1.3.13 502 burned all 3 prior + # mise-internal retries and a 3-attempt wrapper on top + # didn't add enough margin. + case "$attempt" in + 1) backoff=10 ;; + 2) backoff=30 ;; + 3) backoff=60 ;; + 4) backoff=120 ;; + esac + echo "install.sh attempt $attempt failed; retrying in ${backoff}s..." >&2 + sleep "$backoff" + done - name: Run shellcheck # Scope: Zeta's own scripts under `tools/setup/` only — @@ -360,13 +419,44 @@ jobs: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Cache install.sh outputs (mise runtimes + dotnet tools + verifier jars) + # Comprehensive cache — see lint-shell job above for the + # rationale (the human maintainer's 2026-04-28 dev-CI parity input + + # transient 502 prevention). Same cache key shape so all + # lint jobs share the cache when toolchain unchanged. + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.local/bin/mise + ~/.local/share/mise + ~/.cache/mise + ~/.dotnet/tools + ~/.elan + ~/.config/zeta + key: install-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('.mise.toml', 'tools/setup/**', 'global.json') }} + - name: Install toolchain via three-way-parity script (GOVERNANCE §24) - # Installs actionlint via mise (pinned in .mise.toml). Single - # source of truth — the same version flows to dev laptops and - # CI. Replaces a prior inline `Download actionlint (pinned)` - # step whose version was maintained separately from the - # declarative pin. - run: ./tools/setup/install.sh + # See lint-shell job above for retry rationale (Aaron 2026-04-28). + run: | + set -euo pipefail + for attempt in 1 2 3 4 5; do + if ./tools/setup/install.sh; then exit 0; fi + [ "$attempt" = "5" ] && { echo "install.sh failed after 5 attempts"; exit 1; } + # Backoff: 10s, 30s, 60s, 120s — covers transient CDN + # blips from short-burst-and-recover up to multi-minute + # outages. Aaron 2026-04-28 input: bumped from 3 to 5 + # because PR #23's bun-1.3.13 502 burned all 3 prior + # mise-internal retries and a 3-attempt wrapper on top + # didn't add enough margin. + case "$attempt" in + 1) backoff=10 ;; + 2) backoff=30 ;; + 3) backoff=60 ;; + 4) backoff=120 ;; + esac + echo "install.sh attempt $attempt failed; retrying in ${backoff}s..." >&2 + sleep "$backoff" + done - name: Run actionlint # -ignore 'unknown permission scope "administration"' is a @@ -492,15 +582,46 @@ jobs: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Cache install.sh outputs (mise runtimes + dotnet tools + verifier jars) + # Comprehensive cache — see lint-shell job above for the + # rationale. PR #23 2026-04-28 root-cause was a transient + # 502 on bun-1.3.13 download from GitHub releases CDN + # (bun is what mise's npm: backend uses to run + # markdownlint-cli2); cache hit avoids the CDN entirely. + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.local/bin/mise + ~/.local/share/mise + ~/.cache/mise + ~/.dotnet/tools + ~/.elan + ~/.config/zeta + key: install-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('.mise.toml', 'tools/setup/**', 'global.json') }} + - name: Install toolchain via three-way-parity script (GOVERNANCE §24) - # Installs markdownlint-cli2 via mise (pinned in .mise.toml as - # `npm:markdownlint-cli2`). Single source of truth — same - # version on dev laptops + CI runners. Prior step hardcoded - # the version in this workflow (0.18.1) which drifted - # behind the pin discipline the rest of the lints use and - # violated the human maintainer's "update declaratively - # everywhere" ask (2026-04-24). - run: ./tools/setup/install.sh + # See lint-shell job above for retry rationale (Aaron 2026-04-28 + # PR #23 mise+bun-1.3.13 502). + run: | + set -euo pipefail + for attempt in 1 2 3 4 5; do + if ./tools/setup/install.sh; then exit 0; fi + [ "$attempt" = "5" ] && { echo "install.sh failed after 5 attempts"; exit 1; } + # Backoff: 10s, 30s, 60s, 120s — covers transient CDN + # blips from short-burst-and-recover up to multi-minute + # outages. Aaron 2026-04-28 input: bumped from 3 to 5 + # because PR #23's bun-1.3.13 502 burned all 3 prior + # mise-internal retries and a 3-attempt wrapper on top + # didn't add enough margin. + case "$attempt" in + 1) backoff=10 ;; + 2) backoff=30 ;; + 3) backoff=60 ;; + 4) backoff=120 ;; + esac + echo "install.sh attempt $attempt failed; retrying in ${backoff}s..." >&2 + sleep "$backoff" + done - name: Run markdownlint run: mise exec -- markdownlint-cli2 "**/*.md"