From a6a0582cb51dbe02057736ca1426f6f45a6aa564 Mon Sep 17 00:00:00 2001 From: Rasmus Widing Date: Mon, 27 Apr 2026 12:42:41 +0300 Subject: [PATCH 1/2] chore(workflows): group all smoke-test workflows under .archon/workflows/test-workflows/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the 7 existing e2e-*.yaml smoke tests plus the new e2e-minimax-smoke test into a dedicated subfolder. Subfolder grouping is supported by the workflow loader (1 level deep, resolution by filename) so workflow names are unchanged. Mirrors the .archon/workflows/maintainer/ split landing in #1430. Also adds e2e-minimax-smoke.yaml — a sanity check that Pi correctly routes to Minimax M2.7 via the user's local pi auth, and that Pi's best-effort output_format parser handles a small nested schema. Asserts routing by reading the most recent Pi session jsonl rather than asking the model to self-identify (LLMs are unreliable narrators about their own identity, especially when Pi's system prompt mentions other providers as defaults). --- .../e2e-claude-smoke.yaml | 0 .../{ => test-workflows}/e2e-codex-smoke.yaml | 0 .../e2e-deterministic.yaml | 0 .../test-workflows/e2e-minimax-smoke.yaml | 106 ++++++++++++++++++ .../e2e-mixed-providers.yaml | 0 .../e2e-pi-all-nodes-smoke.yaml | 0 .../{ => test-workflows}/e2e-pi-smoke.yaml | 0 .../e2e-worktree-disabled.yaml | 0 8 files changed, 106 insertions(+) rename .archon/workflows/{ => test-workflows}/e2e-claude-smoke.yaml (100%) rename .archon/workflows/{ => test-workflows}/e2e-codex-smoke.yaml (100%) rename .archon/workflows/{ => test-workflows}/e2e-deterministic.yaml (100%) create mode 100644 .archon/workflows/test-workflows/e2e-minimax-smoke.yaml rename .archon/workflows/{ => test-workflows}/e2e-mixed-providers.yaml (100%) rename .archon/workflows/{ => test-workflows}/e2e-pi-all-nodes-smoke.yaml (100%) rename .archon/workflows/{ => test-workflows}/e2e-pi-smoke.yaml (100%) rename .archon/workflows/{ => test-workflows}/e2e-worktree-disabled.yaml (100%) diff --git a/.archon/workflows/e2e-claude-smoke.yaml b/.archon/workflows/test-workflows/e2e-claude-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-claude-smoke.yaml rename to .archon/workflows/test-workflows/e2e-claude-smoke.yaml diff --git a/.archon/workflows/e2e-codex-smoke.yaml b/.archon/workflows/test-workflows/e2e-codex-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-codex-smoke.yaml rename to .archon/workflows/test-workflows/e2e-codex-smoke.yaml diff --git a/.archon/workflows/e2e-deterministic.yaml b/.archon/workflows/test-workflows/e2e-deterministic.yaml similarity index 100% rename from .archon/workflows/e2e-deterministic.yaml rename to .archon/workflows/test-workflows/e2e-deterministic.yaml diff --git a/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml b/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml new file mode 100644 index 0000000000..5c33e75e7d --- /dev/null +++ b/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml @@ -0,0 +1,106 @@ +# E2E smoke test — Minimax M2.7 via the Pi community provider +# Verifies: Pi can resolve and call Minimax M2.7 using the user's local +# `pi /login` credentials (api_key entry in ~/.pi/agent/auth.json). +# Design: mirrors e2e-pi-smoke.yaml structure. Three nodes verify +# (1) the model responds at all, (2) it can self-identify as Minimax, +# (3) it can produce parseable JSON via output_format (best-effort on Pi). +# The final bash node fails fast if any signal is missing. +# Auth: requires a `minimax` entry in ~/.pi/agent/auth.json. No env vars. +name: e2e-minimax-smoke +description: | + Use when: Verifying that Minimax M2.7 loads via the Pi provider with the + user's local Pi auth (api_key in ~/.pi/agent/auth.json). + Triggers: "minimax smoke", "test minimax", "verify minimax", "minimax test". + Does: Sends three tiny prompts to Minimax M2.7 (math, self-identification, + structured JSON), asserts non-empty output and basic plausibility. + NOT for: Production work — connectivity / capability sanity check only. + +provider: pi +model: minimax/MiniMax-M2.7 + +worktree: + enabled: false # Smoke test — no need to isolate + +nodes: + # 1. Connectivity — does Pi resolve the model and stream a response? + - id: hello + prompt: 'What is 2+2? Answer with just the number, nothing else.' + allowed_tools: [] + effort: low + idle_timeout: 60000 + + # 2. Self-identification — INFORMATIONAL ONLY. Do not assert on the result. + # LLMs are unreliable narrators about their own identity, and Pi's system + # prompt mentions OpenAI-codex defaults, which causes Minimax (and likely + # other models) to pattern-match and claim that identity. The real proof + # of routing is in Pi's session jsonl (provider=minimax, real billing). + - id: identify + prompt: 'Without using any tools, on a single short line, tell me which model and provider you are.' + allowed_tools: [] + idle_timeout: 60000 + depends_on: [hello] + + # 3. Structured output — exercises Pi's best-effort output_format path + # (schema appended to prompt + JSON extracted from result text). + # This is the same machinery the maintainer-standup synthesis relies on. + - id: json + prompt: | + Return a JSON object with two fields, no fences and no prose: + - "name": your model name (string) + - "ok": always true (boolean) + allowed_tools: [] + idle_timeout: 60000 + depends_on: [hello] + output_format: + type: object + properties: + name: + type: string + ok: + type: boolean + required: [name, ok] + + # 4. Assertions — fail loudly if any node returned empty / unparseable. + - id: assert + depends_on: [hello, identify, json] + bash: | + math="$hello.output" + ident="$identify.output" + jname="$json.output.name" + jok="$json.output.ok" + + echo "── results ──" + echo "math = $math" + echo "identify = $ident" + echo "json.name = $jname" + echo "json.ok = $jok" + echo "──────────────" + + if [ -z "$math" ] || [ -z "$ident" ]; then + echo "FAIL: empty output from hello or identify node" + exit 1 + fi + if [ -z "$jname" ] || [ -z "$jok" ]; then + echo "FAIL: structured-output fields missing — Pi best-effort JSON parse failed" + exit 1 + fi + + # Real proof of routing: Pi writes a session jsonl per call. Find + # ANY session jsonl modified in the last 3 minutes (the just-completed + # workflow's session) and confirm its model_change event names + # provider=minimax, modelId=MiniMax-M2.7. Avoids brittle path-encoding + # assumptions about how Pi names per-cwd session directories. + recent_session=$(find "$HOME/.pi/agent/sessions" -name '*.jsonl' -mmin -3 -print 2>/dev/null | head -1) + if [ -z "$recent_session" ]; then + echo "FAIL: no Pi session jsonl modified in the last 3 minutes" + exit 1 + fi + if grep -q '"provider":"minimax".*"modelId":"MiniMax-M2.7"' "$recent_session"; then + echo "PASS: Pi session log confirms provider=minimax, modelId=MiniMax-M2.7" + echo " session: $recent_session" + else + echo "FAIL: Pi session log did not confirm minimax routing — possible misroute" + echo " session: $recent_session" + exit 1 + fi + echo "PASS: smoke complete" diff --git a/.archon/workflows/e2e-mixed-providers.yaml b/.archon/workflows/test-workflows/e2e-mixed-providers.yaml similarity index 100% rename from .archon/workflows/e2e-mixed-providers.yaml rename to .archon/workflows/test-workflows/e2e-mixed-providers.yaml diff --git a/.archon/workflows/e2e-pi-all-nodes-smoke.yaml b/.archon/workflows/test-workflows/e2e-pi-all-nodes-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-pi-all-nodes-smoke.yaml rename to .archon/workflows/test-workflows/e2e-pi-all-nodes-smoke.yaml diff --git a/.archon/workflows/e2e-pi-smoke.yaml b/.archon/workflows/test-workflows/e2e-pi-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-pi-smoke.yaml rename to .archon/workflows/test-workflows/e2e-pi-smoke.yaml diff --git a/.archon/workflows/e2e-worktree-disabled.yaml b/.archon/workflows/test-workflows/e2e-worktree-disabled.yaml similarity index 100% rename from .archon/workflows/e2e-worktree-disabled.yaml rename to .archon/workflows/test-workflows/e2e-worktree-disabled.yaml From 9482621415b3ca2e541041f325f695ab0ec917b4 Mon Sep 17 00:00:00 2001 From: Rasmus Widing Date: Mon, 27 Apr 2026 13:30:49 +0300 Subject: [PATCH 2/2] fix(e2e-minimax-smoke): address CodeRabbit review on #1431 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Widen find window from -mmin -3 to -mmin -10. The smoke's three Pi nodes plus the assert can collectively run several minutes on slow networks; 3 minutes was tight enough to false-FAIL on a healthy run. (CodeRabbit minor) - Drop non-deterministic `head -1` over `find` output. find doesn't guarantee any order; on a tie, the wrong file would be picked. Now iterates all matching sessions and breaks on first one carrying the routing signal — any match is sufficient evidence. (CodeRabbit minor) - Replace single-regex `'"provider":"minimax".*"modelId":"MiniMax-M2.7"'` with two separate greps joined by `&&`. JSON field order isn't part of Pi's contract; a future Pi release reordering `provider` and `modelId` in the model_change event would silently false-FAIL the original pattern. The new check is order-independent. (CodeRabbit major) --- .../test-workflows/e2e-minimax-smoke.yaml | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml b/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml index 5c33e75e7d..eefae0d35a 100644 --- a/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml +++ b/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml @@ -85,22 +85,42 @@ nodes: exit 1 fi - # Real proof of routing: Pi writes a session jsonl per call. Find - # ANY session jsonl modified in the last 3 minutes (the just-completed - # workflow's session) and confirm its model_change event names - # provider=minimax, modelId=MiniMax-M2.7. Avoids brittle path-encoding - # assumptions about how Pi names per-cwd session directories. - recent_session=$(find "$HOME/.pi/agent/sessions" -name '*.jsonl' -mmin -3 -print 2>/dev/null | head -1) - if [ -z "$recent_session" ]; then - echo "FAIL: no Pi session jsonl modified in the last 3 minutes" + # Real proof of routing: Pi writes a session jsonl per call. Find ALL + # session jsonls modified in the last 10 minutes (generous window — + # smoke's three Pi nodes + assert can collectively take several + # minutes on a slow network; capped at 10 to avoid matching old runs). + # Check each for the minimax routing signal — any one matching is + # sufficient evidence. This avoids: + # - brittle path-encoding assumptions about Pi's per-cwd session dir, + # - non-deterministic `head -1` over `find` output (find doesn't + # guarantee any order), + # - JSON field-order brittleness in a single combined regex + # (`provider` may appear before or after `modelId` in the jsonl). + recent_sessions=$(find "$HOME/.pi/agent/sessions" -name '*.jsonl' -mmin -10 -print 2>/dev/null) + if [ -z "$recent_sessions" ]; then + echo "FAIL: no Pi session jsonl modified in the last 10 minutes" exit 1 fi - if grep -q '"provider":"minimax".*"modelId":"MiniMax-M2.7"' "$recent_session"; then + + matched="" + while IFS= read -r session; do + # Two separate greps for order-independence — JSON field ordering + # isn't part of Pi's contract, so a single regex with `.*` between + # the two fields would silently false-FAIL if Pi ever reorders. + if grep -q '"provider":"minimax"' "$session" \ + && grep -q '"modelId":"MiniMax-M2.7"' "$session"; then + matched="$session" + break + fi + done <<< "$recent_sessions" + + if [ -n "$matched" ]; then echo "PASS: Pi session log confirms provider=minimax, modelId=MiniMax-M2.7" - echo " session: $recent_session" + echo " session: $matched" else - echo "FAIL: Pi session log did not confirm minimax routing — possible misroute" - echo " session: $recent_session" + echo "FAIL: no recent Pi session log confirmed minimax routing — possible misroute" + echo " checked sessions:" + echo "$recent_sessions" | sed 's/^/ /' exit 1 fi echo "PASS: smoke complete"