From 7964b325fa8b5966089f4d97501dac9caf802f84 Mon Sep 17 00:00:00 2001 From: Lior Date: Tue, 26 May 2026 22:01:43 -0400 Subject: [PATCH 1/2] =?UTF-8?q?fix(B-0832):=20nmtui=20auto-relaunch=20loop?= =?UTF-8?q?=20on=20no-internet=20=E2=80=94=20Esc=20out=20of=20nmtui=20re-l?= =?UTF-8?q?aunches=20(refresh-friendly=20UX)=20instead=20of=20breaking=20i?= =?UTF-8?q?nstall=20flow=20(operator=202026-05-26)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator 2026-05-26 substrate-engineering ask during 3rd USB physical test: 'i want to be able to refresh the network withing breaking the script'. Old behavior: launch nmtui ONCE; if no internet on exit → drop_to_shell. That broke the install flow when operator hit Esc to refresh the wifi scan (empirical 2026-05-26 1st USB physical-test session — B-0832 nmtui WiFi rescan empirical anchor). New behavior: loop nmtui until either: - has_internet succeeds → continue install - operator presses 's' within 10s → drop to shell (escape hatch) Operator flow: - Esc out of nmtui without connecting → nmtui re-launches with fresh wifi scan - Connect successfully → script continues to zeta-install - Need shell access → press 's' at the post-attempt prompt Counter NMTUI_ATTEMPTS tracks session count + logs in success message so operator sees how many tries it took. Composes with .claude/rules/non-coercion-invariant.md HC-8: operator agency preserved (drop-to-shell available; choice is operator-driven). Composes with .claude/rules/substrate-or-it-didnt-happen.md: substrate- honest UX matches operator's real-world Esc-to-refresh expectation. Bash syntax verified clean via bash -n. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- .../usb-nixos-installer/zeta-first-boot.sh | 61 +++++++++++++++---- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh b/full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh index 4cad931668..a31cc8e4d1 100644 --- a/full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh +++ b/full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh @@ -109,10 +109,25 @@ else echo echo "[2/3] No ethernet internet detected. Launching wifi setup (nmtui)." echo " After connecting, quit nmtui to continue install." + echo " Esc out without connecting → nmtui re-launches to refresh scan." echo read -n 1 -s -t 5 -p " Press any key to launch nmtui (or wait 5s) ..." || true echo echo + # B-0832 nmtui auto-relaunch-on-no-internet loop (operator 2026-05-26): + # + # Old behavior: launch nmtui once; if no internet on exit → drop_to_shell. + # That broke the install flow when operator hit Esc to refresh the wifi + # scan (empirical 2026-05-26 1st USB physical-test session — see B-0832). + # + # New behavior: loop nmtui until either (a) has_internet succeeds OR + # (b) operator explicitly requests shell-drop via 's' keystroke. Esc + # out of nmtui without connecting just re-launches nmtui (refresh- + # friendly UX — operator can Esc + re-scan as needed). + # + # Per operator 2026-05-26: "i want to be able to refresh the network + # withing breaking the script". This loop is the substrate-honest fix. + # # nmtui returns 0 on quit regardless of whether connection succeeded. # Absolute path: defense-in-depth alongside the systemd unit's # environment.PATH override (set in configuration.nix on @@ -120,18 +135,40 @@ else # Both defenses together fix B-0754 iteration-1 'nmtui: command not # found' (nmtui IS installed in the ISO via networkmanager in # systemPackages; the issue was PATH inheritance into the unit). - if ! /run/current-system/sw/bin/nmtui; then - echo "[zeta-first-boot] nmtui failed." - drop_to_shell - fi - # Give NetworkManager a moment to actually establish + DHCP - sleep 3 - if ! has_internet; then - echo "[zeta-first-boot] No internet after nmtui. Check connection and" - echo " re-run zeta-install $HOST when network is up." - drop_to_shell - fi - echo " wifi ok" + NMTUI_ATTEMPTS=0 + while true; do + NMTUI_ATTEMPTS=$((NMTUI_ATTEMPTS + 1)) + if ! /run/current-system/sw/bin/nmtui; then + echo "[zeta-first-boot] nmtui failed to launch (attempt ${NMTUI_ATTEMPTS})." + drop_to_shell + fi + # Give NetworkManager a moment to actually establish + DHCP + sleep 3 + if has_internet; then + echo " wifi ok (after ${NMTUI_ATTEMPTS} nmtui session(s))" + break + fi + # No internet yet — give operator the choice to retry or escape + echo + echo "[zeta-first-boot] No internet after nmtui session ${NMTUI_ATTEMPTS}." + echo " Press 's' within 10s to drop to shell, OR" + echo " press any other key (or wait) to re-launch nmtui" + echo " to refresh the wifi scan." + echo + CHOICE="" + read -n 1 -s -t 10 -p " > " CHOICE || true + echo + case "$CHOICE" in + s|S) + echo "[zeta-first-boot] Dropping to shell at operator request." + drop_to_shell + ;; + *) + echo "[zeta-first-boot] Re-launching nmtui for refresh ..." + echo + ;; + esac + done fi echo From 54980ec4cb503169c80bfbc6f3357c0b2119b088 Mon Sep 17 00:00:00 2001 From: Lior Date: Tue, 26 May 2026 22:08:08 -0400 Subject: [PATCH 2/2] fix(#5378 Copilot): parameterize NMTUI_RETRY_PROMPT_SECS (was hard-coded 10s); mirrors ROLE_PROMPT_SECS env-override pattern + adds -r to new read Copilot finding on PR #5378 line 160: > The retry prompt hard-codes the timeout value ('within 10s' in the > message and 'read ... -t 10' in code). This can drift if the timeout > is adjusted later. Consider introducing a single NMTUI_RETRY_PROMPT_SECS > (similar to ROLE_PROMPT_SECS) and use it in both the message + read. Substrate-honest finding. Fix: 1. Add NMTUI_RETRY_PROMPT_SECS=${NMTUI_RETRY_PROMPT_SECS:-10} env- overridable var near ROLE_PROMPT_SECS (consistent pattern) 2. Use $NMTUI_RETRY_PROMPT_SECS in the user-facing 'within Xs' message 3. Use 'read -r -t "${NMTUI_RETRY_PROMPT_SECS}"' in the actual read 4. Added -r flag to the read (ShellCheck Information-level diagnostic on the read I added in the prior commit; hygiene improvement) Now operator can tune via /etc/zeta-firstboot.conf: echo 'NMTUI_RETRY_PROMPT_SECS=30' >> /etc/zeta-firstboot.conf Bash syntax verified clean via bash -n. --- full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh b/full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh index a31cc8e4d1..df01acd50e 100644 --- a/full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh +++ b/full-ai-cluster/usb-nixos-installer/zeta-first-boot.sh @@ -31,6 +31,11 @@ HOST="${HOST:-control-plane}" REPO_URL="${REPO_URL:-https://github.com/Lucent-Financial-Group/Zeta}" ETHERNET_WAIT_SECS="${ETHERNET_WAIT_SECS:-30}" ROLE_PROMPT_SECS="${ROLE_PROMPT_SECS:-10}" +# B-0832 nmtui retry-prompt timeout — operator window to press 's' for +# shell-drop OR any other key (or wait) for nmtui re-launch. Mirrors +# the ROLE_PROMPT_SECS env-override pattern so the timeout is tunable +# without source edits. +NMTUI_RETRY_PROMPT_SECS="${NMTUI_RETRY_PROMPT_SECS:-10}" # ── Role pick: 10-sec single-keystroke prompt ───────────────────────── # Defaults to whatever the ISO's /etc/zeta-firstboot.conf shipped with @@ -151,12 +156,12 @@ else # No internet yet — give operator the choice to retry or escape echo echo "[zeta-first-boot] No internet after nmtui session ${NMTUI_ATTEMPTS}." - echo " Press 's' within 10s to drop to shell, OR" + echo " Press 's' within ${NMTUI_RETRY_PROMPT_SECS}s to drop to shell, OR" echo " press any other key (or wait) to re-launch nmtui" echo " to refresh the wifi scan." echo CHOICE="" - read -n 1 -s -t 10 -p " > " CHOICE || true + read -r -n 1 -s -t "${NMTUI_RETRY_PROMPT_SECS}" -p " > " CHOICE || true echo case "$CHOICE" in s|S)