diff --git a/.archon/workflows/e2e-claude-smoke.yaml b/.archon/workflows/test-workflows/e2e-claude-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-claude-smoke.yaml rename to .archon/workflows/test-workflows/e2e-claude-smoke.yaml diff --git a/.archon/workflows/e2e-codex-smoke.yaml b/.archon/workflows/test-workflows/e2e-codex-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-codex-smoke.yaml rename to .archon/workflows/test-workflows/e2e-codex-smoke.yaml diff --git a/.archon/workflows/e2e-deterministic.yaml b/.archon/workflows/test-workflows/e2e-deterministic.yaml similarity index 100% rename from .archon/workflows/e2e-deterministic.yaml rename to .archon/workflows/test-workflows/e2e-deterministic.yaml diff --git a/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml b/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml new file mode 100644 index 0000000000..eefae0d35a --- /dev/null +++ b/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml @@ -0,0 +1,126 @@ +# E2E smoke test — Minimax M2.7 via the Pi community provider +# Verifies: Pi can resolve and call Minimax M2.7 using the user's local +# `pi /login` credentials (api_key entry in ~/.pi/agent/auth.json). +# Design: mirrors e2e-pi-smoke.yaml structure. Three nodes verify +# (1) the model responds at all, (2) it can self-identify as Minimax, +# (3) it can produce parseable JSON via output_format (best-effort on Pi). +# The final bash node fails fast if any signal is missing. +# Auth: requires a `minimax` entry in ~/.pi/agent/auth.json. No env vars. +name: e2e-minimax-smoke +description: | + Use when: Verifying that Minimax M2.7 loads via the Pi provider with the + user's local Pi auth (api_key in ~/.pi/agent/auth.json). + Triggers: "minimax smoke", "test minimax", "verify minimax", "minimax test". + Does: Sends three tiny prompts to Minimax M2.7 (math, self-identification, + structured JSON), asserts non-empty output and basic plausibility. + NOT for: Production work — connectivity / capability sanity check only. + +provider: pi +model: minimax/MiniMax-M2.7 + +worktree: + enabled: false # Smoke test — no need to isolate + +nodes: + # 1. Connectivity — does Pi resolve the model and stream a response? + - id: hello + prompt: 'What is 2+2? Answer with just the number, nothing else.' + allowed_tools: [] + effort: low + idle_timeout: 60000 + + # 2. Self-identification — INFORMATIONAL ONLY. Do not assert on the result. + # LLMs are unreliable narrators about their own identity, and Pi's system + # prompt mentions OpenAI-codex defaults, which causes Minimax (and likely + # other models) to pattern-match and claim that identity. The real proof + # of routing is in Pi's session jsonl (provider=minimax, real billing). + - id: identify + prompt: 'Without using any tools, on a single short line, tell me which model and provider you are.' + allowed_tools: [] + idle_timeout: 60000 + depends_on: [hello] + + # 3. Structured output — exercises Pi's best-effort output_format path + # (schema appended to prompt + JSON extracted from result text). + # This is the same machinery the maintainer-standup synthesis relies on. + - id: json + prompt: | + Return a JSON object with two fields, no fences and no prose: + - "name": your model name (string) + - "ok": always true (boolean) + allowed_tools: [] + idle_timeout: 60000 + depends_on: [hello] + output_format: + type: object + properties: + name: + type: string + ok: + type: boolean + required: [name, ok] + + # 4. Assertions — fail loudly if any node returned empty / unparseable. + - id: assert + depends_on: [hello, identify, json] + bash: | + math="$hello.output" + ident="$identify.output" + jname="$json.output.name" + jok="$json.output.ok" + + echo "── results ──" + echo "math = $math" + echo "identify = $ident" + echo "json.name = $jname" + echo "json.ok = $jok" + echo "──────────────" + + if [ -z "$math" ] || [ -z "$ident" ]; then + echo "FAIL: empty output from hello or identify node" + exit 1 + fi + if [ -z "$jname" ] || [ -z "$jok" ]; then + echo "FAIL: structured-output fields missing — Pi best-effort JSON parse failed" + exit 1 + fi + + # Real proof of routing: Pi writes a session jsonl per call. Find ALL + # session jsonls modified in the last 10 minutes (generous window — + # smoke's three Pi nodes + assert can collectively take several + # minutes on a slow network; capped at 10 to avoid matching old runs). + # Check each for the minimax routing signal — any one matching is + # sufficient evidence. This avoids: + # - brittle path-encoding assumptions about Pi's per-cwd session dir, + # - non-deterministic `head -1` over `find` output (find doesn't + # guarantee any order), + # - JSON field-order brittleness in a single combined regex + # (`provider` may appear before or after `modelId` in the jsonl). + recent_sessions=$(find "$HOME/.pi/agent/sessions" -name '*.jsonl' -mmin -10 -print 2>/dev/null) + if [ -z "$recent_sessions" ]; then + echo "FAIL: no Pi session jsonl modified in the last 10 minutes" + exit 1 + fi + + matched="" + while IFS= read -r session; do + # Two separate greps for order-independence — JSON field ordering + # isn't part of Pi's contract, so a single regex with `.*` between + # the two fields would silently false-FAIL if Pi ever reorders. + if grep -q '"provider":"minimax"' "$session" \ + && grep -q '"modelId":"MiniMax-M2.7"' "$session"; then + matched="$session" + break + fi + done <<< "$recent_sessions" + + if [ -n "$matched" ]; then + echo "PASS: Pi session log confirms provider=minimax, modelId=MiniMax-M2.7" + echo " session: $matched" + else + echo "FAIL: no recent Pi session log confirmed minimax routing — possible misroute" + echo " checked sessions:" + echo "$recent_sessions" | sed 's/^/ /' + exit 1 + fi + echo "PASS: smoke complete" diff --git a/.archon/workflows/e2e-mixed-providers.yaml b/.archon/workflows/test-workflows/e2e-mixed-providers.yaml similarity index 100% rename from .archon/workflows/e2e-mixed-providers.yaml rename to .archon/workflows/test-workflows/e2e-mixed-providers.yaml diff --git a/.archon/workflows/e2e-pi-all-nodes-smoke.yaml b/.archon/workflows/test-workflows/e2e-pi-all-nodes-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-pi-all-nodes-smoke.yaml rename to .archon/workflows/test-workflows/e2e-pi-all-nodes-smoke.yaml diff --git a/.archon/workflows/e2e-pi-smoke.yaml b/.archon/workflows/test-workflows/e2e-pi-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-pi-smoke.yaml rename to .archon/workflows/test-workflows/e2e-pi-smoke.yaml diff --git a/.archon/workflows/e2e-worktree-disabled.yaml b/.archon/workflows/test-workflows/e2e-worktree-disabled.yaml similarity index 100% rename from .archon/workflows/e2e-worktree-disabled.yaml rename to .archon/workflows/test-workflows/e2e-worktree-disabled.yaml