diff --git a/.archon/workflows/test-workflows/e2e-copilot-all-features.yaml b/.archon/workflows/test-workflows/e2e-copilot-all-features.yaml new file mode 100644 index 0000000000..dc024ae36a --- /dev/null +++ b/.archon/workflows/test-workflows/e2e-copilot-all-features.yaml @@ -0,0 +1,314 @@ +# E2E features smoke — GitHub Copilot community provider +# Verifies (in one DAG, takes ~90-120s on Linux gpt-5-mini): +# 1. Basic chat round-trip (sessionResume-able shape). +# 2. effort: high → SDK reasoningEffort='high' translation. +# 3. denied_tools: [shell] → SDK excludedTools passthrough — model must +# respect the deny-list (no shell tool calls). +# 4. output_format JSON schema → best-effort structured output via prompt +# augmentation + transcript parse (Pi #1297 pattern, shared/structured-output). +# 5. nodeConfig.mcp → SessionConfig.mcpServers (env-expanded); model +# uses the @modelcontextprotocol/server-everything stdio MCP server. +# 6. nodeConfig.skills → SessionConfig.skillDirectories; staged SKILL.md +# provides the answer the model is asked to retrieve. +# 7. nodeConfig.agents → SessionConfig.customAgents; model is asked to +# delegate to an inline custom agent and surface its response. +# Auth: run `copilot login`, or set COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN. +# External deps: `npx` available on PATH (for the MCP server). Built-in on +# any Node install ≥ 14. +# Doubles as adoption docs — each scenario is documented inline. +name: e2e-copilot-all-features +description: 'Copilot provider feature smoke — chat + effort + tool restrictions + structured output + MCP + skills + agents.' +provider: copilot +model: gpt-5-mini + +worktree: + enabled: false # Smoke — no isolation needed. + +nodes: + # 1. Connectivity — does the SDK start, stream, and emit a result chunk? + - id: hello + prompt: 'Reply with exactly the word PONG and nothing else.' + idle_timeout: 60000 + + # 2. effort: high → reasoningEffort='high'. Asking a small arithmetic + # question that is trivial for the model but exercises the reasoning + # path (visible as a longer latency on a fresh session). + - id: reasoning + prompt: 'What is 17 multiplied by 23? Answer with exactly the integer, no prose.' + effort: high + idle_timeout: 90000 + depends_on: [hello] + + # 3. denied_tools — the model is asked to do something it would normally + # use the shell tool for. With shell denied, it must decline or fall + # back to inline reasoning. The assert below checks the model did NOT + # invoke the shell tool by inspecting the result text for a refusal / + # inline-only marker. + - id: tool_restricted + prompt: | + You have NO shell access. Without running any tools, reply with exactly: + DENIED_OK + denied_tools: [shell, write] + idle_timeout: 60000 + depends_on: [hello] + + # 4. output_format → best-effort JSON via shared/structured-output. + # Same pattern Pi #1297 uses: prompt-augmented with a JSON-only + # instruction, parsed at agent_end. + - id: structured + prompt: | + Return a JSON object with two fields, no fences, no prose: + - "model": string — the model alias you were called with + - "ok": boolean — always true + idle_timeout: 60000 + depends_on: [hello] + output_format: + type: object + properties: + model: + type: string + ok: + type: boolean + required: [model, ok] + + # 5. Stage a tiny SKILL.md so `skills_demo` has something to resolve. + # Staged at runtime (not committed) so the smoke skill stays scoped + # to this workflow and doesn't leak into other AI runs. + # + # Format mirrors the agentskills.io standard (see .claude/skills/* + # in this repo for working examples). The description carries an + # explicit "Use when..." trigger so Copilot's skill-metadata scan + # picks the right invocation; the body is short and unambiguous so + # once invoked the model has no room to paraphrase. + - id: setup_skills_fixture + bash: | + set -e + mkdir -p .agents/skills/copilot-smoke + cat > .agents/skills/copilot-smoke/SKILL.md <<'SKILL_EOF' + --- + name: copilot-smoke + description: | + Returns a fixed smoke-test token. Use when the user asks for the + "copilot-smoke token", invokes the copilot-smoke skill directly, + or otherwise requests the literal token this skill provides. + Output is always the single token SK_a8f3kL2qZTOK on its own + line, with no prose, framing, or commentary. + --- + + # copilot-smoke + + This skill exists for one purpose: to verify that the Copilot + provider's `skillDirectories` wiring actually injects skill bodies + into the model's context end-to-end. It is invoked exclusively by + the `e2e-copilot-all-features` smoke test. + + ## Output (verbatim) + + ``` + SK_a8f3kL2qZTOK + ``` + + ## Behavior + + When invoked, you MUST respond with EXACTLY the line: + + SK_a8f3kL2qZTOK + + Nothing before. Nothing after. No quotation marks, no markdown + fences, no commentary, no description of what the skill does. + Just the bare token on its own line. + + The token is intentionally an unguessable random string — its + presence in your output is the only proof the SDK actually loaded + this SKILL.md body into the model's context (rather than just the + frontmatter description). + + Any deviation — paraphrasing, summarizing, narrating, asking + clarifying questions, or producing different text — is incorrect + behavior for this skill. + SKILL_EOF + echo "skill staged: .agents/skills/copilot-smoke/SKILL.md" + + # 6. Stage mcp.json pointing at the canonical + # @modelcontextprotocol/server-everything stdio MCP server. Server + # has built-in `add(a,b)` and `echo(message)` tools we can hit. + - id: setup_mcp_fixture + bash: | + set -e + mkdir -p .archon/test-fixtures + cat > .archon/test-fixtures/copilot-mcp.json <<'MCP_EOF' + { + "everything": { + "type": "local", + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-everything", "stdio"], + "tools": ["*"] + } + } + MCP_EOF + echo "mcp config staged: .archon/test-fixtures/copilot-mcp.json" + + # 7. mcp_demo — model uses the `everything` server's `add` tool. + # Asserts via the model's response (5 = 2+3 should appear). Pino log + # line `copilot.mcp_loaded` (info level) is the deterministic proof + # that Archon translated nodeConfig.mcp → SessionConfig.mcpServers. + - id: mcp_demo + prompt: | + Use the MCP `add` tool from the `everything` server to compute 2 + 3. + Reply with EXACTLY the integer result (just "5"), no prose. + mcp: .archon/test-fixtures/copilot-mcp.json + idle_timeout: 90000 + depends_on: [setup_mcp_fixture] + + # 8. skills_demo — uses the staged copilot-smoke skill. The skill's + # SKILL.md tells the model exactly how to answer. Pino log line + # `copilot.skills_resolved` proves the path-resolution wiring; + # the prompt below explicitly invokes the skill so the SDK's + # progressive-disclosure layer loads the body (Copilot, like + # Claude, lazy-loads skill bodies on invocation rather than at + # session start, so a vague prompt sees only the description). + - id: skills_demo + prompt: | + Invoke the `copilot-smoke` skill from your active skill set and + follow its instructions verbatim. Respond with ONLY the literal + output the skill specifies — no prose, no framing, no quotation + marks, no markdown fences. + skills: [copilot-smoke] + idle_timeout: 60000 + depends_on: [setup_skills_fixture] + + # 9. agents_demo — defines an inline custom agent and asks the model + # to delegate to it via the Task tool. Pino log line + # `copilot.agents_registered` proves customAgents wiring. + # + # Notes on the design: + # - Agent name `task-responder` (not "smoke-helper") so the model + # can't confuse it with the parallel `copilot-smoke` skill. + # - Agent marker is an unguessable random token so a model that + # merely confabulates a "plausible-sounding" marker will not + # match the assert. The token's presence in the response is the + # proof the agent's prompt actually reached the model. + # - Prompt explicitly tells the model to USE the Task tool. gpt-5- + # mini sometimes inlines the answer; either path is acceptable + # as long as the unguessable token surfaces. + - id: agents_demo + prompt: | + You have a sub-agent registered called `task-responder`. Use the + Task tool to invoke it with any input — the agent's instructions + will produce a fixed token. Surface the agent's exact literal + output in your final reply, with no other text. + agents: + task-responder: + description: Returns a fixed agent-side smoke token. Use exclusively when invoked by the e2e-copilot-all-features smoke test. + prompt: | + When invoked with anything, respond with EXACTLY this single + line and nothing else (no prose, no quotes, no fences): + + AG_n5k7HpT3wAGOK + + The token is an unguessable random string — its presence in + your output is the only proof the agent's `prompt` field was + actually loaded by the SDK. + idle_timeout: 90000 + depends_on: [hello] + + # 10. Assertions — fail loudly if any node returned empty / unparseable. + # Note on quoting: the workflow engine's variable substitution already + # shell-quotes node outputs for bash nodes (see #591 "Fix: shell injection + # via $nodeId.output in bash nodes" + dag-executor.ts substituteNodeOutputRefs). + # So we assign $nodeId.output BARE — the engine wraps the value in a + # properly escaped single-quoted string before bash sees it. + - id: assert + depends_on: [hello, reasoning, tool_restricted, structured, mcp_demo, skills_demo, agents_demo] + trigger_rule: all_success + bash: | + hello_raw=$hello.output + reasoning_raw=$reasoning.output + restricted_raw=$tool_restricted.output + json_model=$structured.output.model + json_ok=$structured.output.ok + mcp_raw=$mcp_demo.output + skills_raw=$skills_demo.output + agents_raw=$agents_demo.output + + # Redirect to stderr (>&2) so the results table surfaces in terminal + # output even when the bash node succeeds — successful node stdout is + # captured silently as the node's output, only stderr is echoed live. + { + echo "── results ──" + printf 'hello = %s\n' "$hello_raw" + printf 'reasoning = %s\n' "$reasoning_raw" + printf 'restricted = %s\n' "$restricted_raw" + printf 'json.model = %s\n' "$json_model" + printf 'json.ok = %s\n' "$json_ok" + printf 'mcp_demo = %s\n' "$mcp_raw" + printf 'skills_demo = %s\n' "$skills_raw" + printf 'agents_demo = %s\n' "$agents_raw" + echo "──────────────" + } >&2 + + fail=0 + check_nonempty() { + if [ -z "$2" ]; then + printf 'FAIL: %s produced empty output\n' "$1" + fail=1 + fi >&2 + } + check_contains() { + local name="$1" + local needle="$2" + local haystack="$3" + if ! printf '%s\n' "$haystack" | grep -F -q -- "$needle"; then + printf 'FAIL: %s missing %s, got: %s\n' "$name" "$needle" "$haystack" + fail=1 + fi >&2 + } + + check_nonempty hello "$hello_raw" + check_nonempty reasoning "$reasoning_raw" + check_nonempty tool_restricted "$restricted_raw" + check_nonempty mcp_demo "$mcp_raw" + check_nonempty skills_demo "$skills_raw" + check_nonempty agents_demo "$agents_raw" + + check_contains hello "PONG" "$hello_raw" + check_contains reasoning "391" "$reasoning_raw" + check_contains tool_restricted "DENIED_OK" "$restricted_raw" + check_contains mcp_demo "5" "$mcp_raw" + check_contains skills_demo "SK_a8f3kL2qZTOK" "$skills_raw" + check_contains agents_demo "AG_n5k7HpT3wAGOK" "$agents_raw" + + + { + # structured-output JSON path access. Empty means the prompt- + # augmented JSON parse failed. + if [ -z "$json_model" ] || [ -z "$json_ok" ]; then + printf 'FAIL: structured-output fields missing — best-effort JSON parse failed\n' + fail=1 + fi + if [ "$json_ok" != "true" ]; then + printf 'FAIL: structured.json.ok != true (got: %s)\n' "$json_ok" + fail=1 + fi + + if [ "$fail" -eq 1 ]; then + echo "──────────────" + echo "FAIL: one or more capability checks failed" + exit 1 + fi + echo "PASS: all seven capabilities exercised end-to-end" + } >&2 + + # 11. cleanup — remove staged fixtures so the repo stays tidy regardless + # of pass/fail. trigger_rule: all_done means this runs even if the + # assert (or any AI node) failed. + - id: cleanup + depends_on: [assert] + trigger_rule: all_done + bash: | + rm -rf .agents/skills/copilot-smoke + rm -f .archon/test-fixtures/copilot-mcp.json + rmdir .archon/test-fixtures 2>/dev/null || true + rmdir .agents/skills 2>/dev/null || true + rmdir .agents 2>/dev/null || true + echo "cleanup complete" diff --git a/.archon/workflows/test-workflows/e2e-copilot-smoke.yaml b/.archon/workflows/test-workflows/e2e-copilot-smoke.yaml new file mode 100644 index 0000000000..10accedf05 --- /dev/null +++ b/.archon/workflows/test-workflows/e2e-copilot-smoke.yaml @@ -0,0 +1,31 @@ +# E2E smoke test — GitHub Copilot community provider +# Verifies: provider registration, SDK session start, simple prompt response. +# Auth: run `copilot login`, or provide COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN. +name: e2e-copilot-smoke +description: 'Smoke test for the GitHub Copilot community provider.' +provider: copilot +model: gpt-5-mini + +nodes: + - id: simple + prompt: 'Reply with exactly COPILOT_OK' + idle_timeout: 60000 # gpt-5-mini occasionally pauses past the 30s default + + - id: assert + bash: | + # $simple.output is shell-quoted by the workflow engine before bash + # sees it (#591 + dag-executor.ts substituteNodeOutputRefs with + # escapedForBash=true), so assign bare — wrapping in additional + # quotes would double-wrap and break on outputs containing + # apostrophes or parens. + output_raw=$simple.output + if [ -z "$output_raw" ]; then + echo "FAIL: simple node returned empty output" + exit 1 + fi + printf '%s\n' "$output_raw" | grep -F -q -- 'COPILOT_OK' || { + printf 'FAIL: expected COPILOT_OK, got: %s\n' "$output_raw" + exit 1 + } + printf 'PASS: simple=%s\n' "$output_raw" + depends_on: [simple] diff --git a/bun.lock b/bun.lock index 7f15ead093..72ca60ffd5 100644 --- a/bun.lock +++ b/bun.lock @@ -130,6 +130,7 @@ "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.121", "@archon/paths": "workspace:*", + "@github/copilot-sdk": "^0.2.2", "@mariozechner/pi-ai": "^0.67.5", "@mariozechner/pi-coding-agent": "^0.67.5", "@openai/codex-sdk": "^0.125.0", @@ -545,6 +546,22 @@ "@floating-ui/utils": ["@floating-ui/utils@0.2.11", "", {}, "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg=="], + "@github/copilot": ["@github/copilot@1.0.34", "", { "optionalDependencies": { "@github/copilot-darwin-arm64": "1.0.34", "@github/copilot-darwin-x64": "1.0.34", "@github/copilot-linux-arm64": "1.0.34", "@github/copilot-linux-x64": "1.0.34", "@github/copilot-win32-arm64": "1.0.34", "@github/copilot-win32-x64": "1.0.34" }, "bin": { "copilot": "npm-loader.js" } }, "sha512-jFYulj1v00b3j43Er9+WwhZ/XldGq7+gti2s2pRhrdPwYEd1PMvscDZwRa/1iUBz/XQ5HUGac1tD8P7+VUpWjg=="], + + "@github/copilot-darwin-arm64": ["@github/copilot-darwin-arm64@1.0.34", "", { "os": "darwin", "cpu": "arm64", "bin": { "copilot-darwin-arm64": "copilot" } }, "sha512-g94EhSLd3a6fckZ6xb/zP2DZJZEx7kONWdOoDiHXUtSqc4RiZ7OBq1EwT4WrPY1lsmy9sioJIcZSGzJd0C1M7Q=="], + + "@github/copilot-darwin-x64": ["@github/copilot-darwin-x64@1.0.34", "", { "os": "darwin", "cpu": "x64", "bin": { "copilot-darwin-x64": "copilot" } }, "sha512-tIgFEZV0ohCF/VgTODJWre3xURsvEd+6IPN/HPKWxG6AXtJOxzjlr5kLYYdPHdNlHNmSxGQw8fWsN2FZ4nyDdw=="], + + "@github/copilot-linux-arm64": ["@github/copilot-linux-arm64@1.0.34", "", { "os": "linux", "cpu": "arm64", "bin": { "copilot-linux-arm64": "copilot" } }, "sha512-feqjEetrlqBUhYskIsPmwACQOWO99cvRpKwIFl3OlEjWoj+//HA7yXh49UIe0gD8wQUI8hy05uVz3K2/xti2nQ=="], + + "@github/copilot-linux-x64": ["@github/copilot-linux-x64@1.0.34", "", { "os": "linux", "cpu": "x64", "bin": { "copilot-linux-x64": "copilot" } }, "sha512-3l0rZZqmceklHizJaaO+Iy2PsAZpVZS9Mn9VYnVcY/8Yzt4Y2hmXSFcKVfc4l+JlhFsPs7trhMdIkfwkjaKPLg=="], + + "@github/copilot-sdk": ["@github/copilot-sdk@0.2.2", "", { "dependencies": { "@github/copilot": "^1.0.21", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" } }, "sha512-VZCqS08YlUM90bUKJ7VLeIxgTTEHtfXBo84T1IUMNvXRREX2csjPH6Z+CPw3S2468RcCLvzBXcc9LtJJTLIWFw=="], + + "@github/copilot-win32-arm64": ["@github/copilot-win32-arm64@1.0.34", "", { "os": "win32", "cpu": "arm64", "bin": { "copilot-win32-arm64": "copilot.exe" } }, "sha512-06kEJO3iyohmAqF4iIbOxOfWLFSIpLDJ1L1oEHRtouMrH2Ll1wrUjsoQT1gXgBOv7rifl25qx/Avx5zKqvuORw=="], + + "@github/copilot-win32-x64": ["@github/copilot-win32-x64@1.0.34", "", { "os": "win32", "cpu": "x64", "bin": { "copilot-win32-x64": "copilot.exe" } }, "sha512-QLL8pS4q2TTyQbClEXxqXtQGPr4lk+pwc8hPMUL7iw7HGDOvs1WCLMT1ZSDPPcxSrTnR/dURX5za1NMA8uF/fw=="], + "@google/genai": ["@google/genai@1.50.1", "", { "dependencies": { "google-auth-library": "^10.3.0", "p-retry": "^4.6.2", "protobufjs": "^7.5.4", "ws": "^8.18.0" }, "peerDependencies": { "@modelcontextprotocol/sdk": "^1.25.2" }, "optionalPeers": ["@modelcontextprotocol/sdk"] }, "sha512-YbkX7H9+1Pt8wOt7DDREy8XSoiL6fRDzZQRyaVBarFf8MR3zHGqVdvM4cLbDXqPhxqvegZShgfxb8kw9C7YhAQ=="], "@grammyjs/types": ["@grammyjs/types@3.26.0", "", {}, "sha512-jlnyfxfev/2o68HlvAGRocAXgdPPX5QabG7jZlbqC2r9DZyWBfzTlg+nu3O3Fy4EhgLWu28hZ/8wr7DsNamP9A=="], @@ -2749,6 +2766,8 @@ "vitefu": ["vitefu@1.1.3", "", { "peerDependencies": { "vite": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0" }, "optionalPeers": ["vite"] }, "sha512-ub4okH7Z5KLjb6hDyjqrGXqWtWvoYdU3IGm/NorpgHncKoLTCfRIbvlhBm7r0YstIaQRYlp4yEbFqDcKSzXSSg=="], + "vscode-jsonrpc": ["vscode-jsonrpc@8.2.1", "", {}, "sha512-kdjOSJ2lLIn7r1rtrMbbNCHjyMPfRnowdKjBQ+mGq6NAW5QY2bEZC/khaC5OR8svbbjvLEaIXkOq45e2X9BIbQ=="], + "web-namespaces": ["web-namespaces@2.0.1", "", {}, "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ=="], "web-streams-polyfill": ["web-streams-polyfill@3.3.3", "", {}, "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw=="], @@ -2851,6 +2870,22 @@ "@expressive-code/plugin-shiki/shiki": ["shiki@3.23.0", "", { "dependencies": { "@shikijs/core": "3.23.0", "@shikijs/engine-javascript": "3.23.0", "@shikijs/engine-oniguruma": "3.23.0", "@shikijs/langs": "3.23.0", "@shikijs/themes": "3.23.0", "@shikijs/types": "3.23.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-55Dj73uq9ZXL5zyeRPzHQsK7Nbyt6Y10k5s7OjuFZGMhpp4r/rsLBH0o/0fstIzX1Lep9VxefWljK/SKCzygIA=="], + "@github/copilot-sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], + + "@img/sharp-darwin-arm64/@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="], + + "@img/sharp-darwin-x64/@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="], + + "@img/sharp-linux-arm/@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.2.4", "", { "os": "linux", "cpu": "arm" }, "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A=="], + + "@img/sharp-linux-arm64/@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw=="], + + "@img/sharp-linux-x64/@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw=="], + + "@img/sharp-linuxmusl-arm64/@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw=="], + + "@img/sharp-linuxmusl-x64/@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="], + "@inquirer/core/wrap-ansi": ["wrap-ansi@6.2.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA=="], "@mariozechner/pi-ai/@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.90.0", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-MzZtPabJF1b0FTDl6Z6H5ljphPwACLGP13lu8MTiB8jXaW/YXlpOp+Po2cVou3MPM5+f5toyLnul9whKCy7fBg=="], diff --git a/packages/core/src/config/config-loader.ts b/packages/core/src/config/config-loader.ts index 4bf22d9144..3bd2e0109f 100644 --- a/packages/core/src/config/config-loader.ts +++ b/packages/core/src/config/config-loader.ts @@ -98,6 +98,7 @@ const SAFE_ASSISTANT_FIELDS: Record = { codex: ['model', 'modelReasoningEffort', 'webSearchMode'], // community providers — list each field we're confident is safe to // show in the web UI. Unknown providers fall through with no fields. + copilot: ['model'], pi: ['model'], }; diff --git a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md index de4004a6ba..362f71aba0 100644 --- a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md +++ b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md @@ -1,6 +1,6 @@ --- title: AI Assistants -description: Configure Claude Code, Codex, and Pi as AI assistants for Archon. +description: Configure Claude Code, Codex, GitHub Copilot, and Pi as AI assistants for Archon. category: getting-started area: clients audience: [user] @@ -9,7 +9,7 @@ sidebar: order: 4 --- -You must configure **at least one** AI assistant. All three can be configured and mixed within workflows. +You must configure **at least one** AI assistant. All four can be configured and mixed within workflows. ## Claude Code @@ -229,6 +229,84 @@ If you want Codex to be the default AI assistant for new conversations without c DEFAULT_AI_ASSISTANT=codex ``` +## GitHub Copilot (Community Provider) + +**SDK-backed community provider.** Archon's Copilot adapter uses `@github/copilot-sdk`, which drives the Copilot CLI over GitHub's supported JSON-RPC bridge instead of screen-scraping the interactive TUI. + +Copilot is registered as `builtIn: false` — like Pi, it is a bundled community provider rather than a core built-in. + +### Install + +For source installs, `bun install` pulls in the SDK and its bundled CLI dependency automatically. + +For compiled Archon binaries, install the Copilot CLI yourself and point Archon at it if needed: + +```bash +# Any platform +npm install -g @github/copilot +``` + +Optional override paths: + +```ini +COPILOT_CLI_PATH=/absolute/path/to/copilot +``` + +```yaml +assistants: + copilot: + copilotCliPath: /absolute/path/to/copilot +``` + +### Authenticate + +Unlike Claude (OAuth subscription **or** independent `ANTHROPIC_API_KEY`) and Codex (OAuth **or** `OPENAI_API_KEY`), **GitHub Copilot has only one auth model: GitHub OAuth, billed through your Copilot subscription.** There is no PAT-style standalone Copilot credential — Copilot eligibility is bound to the OAuth token's user identity, not to an API key. + +What this means: the options below are different *delivery paths for the same OAuth token*, not separate auth schemes. + +| Path | When to use | Notes | +|---|---|---| +| `copilot login` | Local dev | Cached interactive OAuth. The SDK reads it via `useLoggedInUser: true`. | +| `COPILOT_GITHUB_TOKEN=` | CI / scripted | Explicit env. Useful when you've already obtained an OAuth token. | +| `GH_TOKEN=` | CI / scripted | Alternate env var the SDK accepts. | +| `GITHUB_TOKEN=` | CI / scripted | **GitHub Actions' workflow-scoped `${{ github.token }}` does NOT carry Copilot scope.** Use only a token with Copilot scope — typically `gh auth token` after `gh auth login --scopes copilot`. | + +When any of the three env vars is set, Archon passes the value to `CopilotClient({ githubToken })` and disables `useLoggedInUser`. Otherwise it leaves `useLoggedInUser: true` so the SDK reuses the cached `copilot login` credentials. + +Request-scoped env vars still win, so codebase env overrides work the same way they do for the other providers. + +### Configuration Options + +```yaml +assistants: + copilot: + model: gpt-5-mini + # Optional: explicit Copilot CLI path + # copilotCliPath: /absolute/path/to/copilot + # Optional: override Copilot config dir + # configDir: /absolute/path/to/copilot-config + # Optional: allow Copilot to auto-discover repo MCP/skills + # enableConfigDiscovery: false +``` + +> **⚠️ Trust boundary.** `enableConfigDiscovery: true` lets the Copilot CLI/SDK load repo-level config (e.g. `.mcp.json`, `.vscode/mcp.json`, skill directories) directly, bypassing Archon's workflow validation surface. Only enable it for repositories you trust. Archon's default (`false`) keeps MCP/skills under explicit workflow control via `nodeConfig.mcp` and `nodeConfig.skills`. + +### Supported Archon Features + +| Feature | Support | Notes | +|---|---|---| +| Session resume | ✅ | Returns `sessionId` and reuses it on resume | +| Reasoning control | ✅ | `effort:` / string `thinking:` map to Copilot `reasoningEffort` | +| System prompt override | ✅ | `systemPrompt:` | +| Codebase env vars (`envInjection`) | ✅ | merged into the spawned Copilot CLI environment | +| Tool restrictions | ✅ | `allowed_tools` → `availableTools`, `denied_tools` → `excludedTools` (SDK enforces `availableTools` precedence when both are set) | +| MCP servers | ✅ | `mcp: path/to/servers.json` → `SessionConfig.mcpServers` (env vars `$FOO` expanded; missing vars warned) | +| Skills | ✅ | `skills: [name]` resolved from `.agents/skills/`, `.claude/skills/` (project or home) → `SessionConfig.skillDirectories` | +| Structured output | ✅ | best-effort: schema instruction appended to the prompt, assistant transcript parsed as JSON on completion (models that reliably follow instruction succeed; unparseable output degrades to the dag-executor's missing-output warning) | +| Sub-agents (`agents:`) | ✅ | `name`/`description`/`prompt`/`tools` (allowlist) map 1:1 to `SessionConfig.customAgents`; Claude-specific fields (`model`, `disallowedTools`, `skills`, `maxTurns`) warn per agent and are ignored | +| Fallback model | ❌ | not wired | +| Sandbox | ❌ | Copilot permissions are separate from Archon's sandbox surface | + ## Pi (Community Provider) **One adapter, ~20 LLM backends.** Pi (`@mariozechner/pi-coding-agent`) is a community-maintained coding-agent harness that Archon integrates as the first community provider. It unlocks Anthropic, OpenAI, Google (Gemini + Vertex), Groq, Mistral, Cerebras, xAI, OpenRouter, Hugging Face, and local inference (LM Studio, ollama, llamacpp, custom OpenAI-compatible endpoints registered in `~/.pi/agent/models.json`) under a single `provider: pi` entry. diff --git a/packages/providers/package.json b/packages/providers/package.json index d59911b9a6..1db65a237d 100644 --- a/packages/providers/package.json +++ b/packages/providers/package.json @@ -13,17 +13,19 @@ "./codex/provider": "./src/codex/provider.ts", "./codex/config": "./src/codex/config.ts", "./codex/binary-resolver": "./src/codex/binary-resolver.ts", + "./community/copilot": "./src/community/copilot/index.ts", "./community/pi": "./src/community/pi/index.ts", "./errors": "./src/errors.ts", "./registry": "./src/registry.ts" }, "scripts": { - "test": "bun test src/claude/provider.test.ts && bun test src/codex/provider.test.ts && bun test src/registry.test.ts && bun test src/codex/binary-guard.test.ts && bun test src/codex/binary-resolver.test.ts && bun test src/codex/binary-resolver-dev.test.ts && bun test src/claude/binary-resolver.test.ts && bun test src/claude/binary-resolver-dev.test.ts && bun test src/community/pi/model-ref.test.ts && bun test src/community/pi/config.test.ts && bun test src/community/pi/event-bridge.test.ts && bun test src/community/pi/options-translator.test.ts && bun test src/community/pi/session-resolver.test.ts && bun test src/community/pi/provider.test.ts && bun test src/community/pi/provider-lazy-load.test.ts", + "test": "bun test src/claude/provider.test.ts && bun test src/codex/provider.test.ts && bun test src/registry.test.ts && bun test src/codex/binary-guard.test.ts && bun test src/codex/binary-resolver.test.ts && bun test src/codex/binary-resolver-dev.test.ts && bun test src/claude/binary-resolver.test.ts && bun test src/claude/binary-resolver-dev.test.ts && bun test src/community/pi/model-ref.test.ts && bun test src/community/pi/config.test.ts && bun test src/community/pi/event-bridge.test.ts && bun test src/community/pi/options-translator.test.ts && bun test src/community/pi/session-resolver.test.ts && bun test src/community/pi/provider.test.ts && bun test src/community/pi/provider-lazy-load.test.ts && bun test src/community/copilot/config.test.ts && bun test src/community/copilot/binary-resolver.test.ts && bun test src/community/copilot/provider.test.ts && bun test src/community/copilot/tool-restrictions.test.ts && bun test src/community/copilot/mcp-translation.test.ts && bun test src/community/copilot/skills-translation.test.ts && bun test src/community/copilot/structured-output.test.ts && bun test src/community/copilot/provider-hardening.test.ts && bun test src/community/copilot/agents-translation.test.ts && bun test src/community/copilot/provider-lazy-load.test.ts", "type-check": "bun x tsc --noEmit" }, "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.121", "@archon/paths": "workspace:*", + "@github/copilot-sdk": "^0.2.2", "@mariozechner/pi-ai": "^0.67.5", "@mariozechner/pi-coding-agent": "^0.67.5", "@openai/codex-sdk": "^0.125.0", diff --git a/packages/providers/src/community/copilot/agents-translation.test.ts b/packages/providers/src/community/copilot/agents-translation.test.ts new file mode 100644 index 0000000000..9226d1f84e --- /dev/null +++ b/packages/providers/src/community/copilot/agents-translation.test.ts @@ -0,0 +1,214 @@ +import { beforeEach, describe, expect, mock, test } from 'bun:test'; + +const mockLogger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', +}; + +mock.module('@archon/paths', () => ({ + createLogger: () => mockLogger, +})); + +mock.module('./binary-resolver', () => ({ + resolveCopilotCliPath: async () => '/usr/local/bin/copilot', +})); + +type SessionHandler = (event: Record) => void; + +let registeredHandlers: Record = {}; +const mockSession = { + sessionId: 'copilot-session-agents', + on: mock((eventType: string, handler: SessionHandler) => { + registeredHandlers[eventType] ??= []; + registeredHandlers[eventType].push(handler); + return () => undefined; + }), + sendAndWait: mock(async () => ({ data: { content: 'ok', messageId: 'm' } })), + abort: mock(async () => undefined), + disconnect: mock(async () => undefined), +}; + +const capturedSessionConfigs: Array> = []; +const mockCreateSession = mock(async (config: Record) => { + capturedSessionConfigs.push(config); + return mockSession; +}); + +mock.module('@github/copilot-sdk', () => ({ + approveAll: () => ({ kind: 'approved' }), + CopilotClient: class MockCopilotClient { + createSession = mockCreateSession; + resumeSession = mock(async () => mockSession); + stop = mock(async () => []); + }, +})); + +import { CopilotProvider } from './provider'; + +async function collect(generator: AsyncGenerator): Promise { + const chunks: unknown[] = []; + for await (const chunk of generator) chunks.push(chunk); + return chunks; +} + +describe('applyAgents', () => { + beforeEach(() => { + registeredHandlers = {}; + capturedSessionConfigs.length = 0; + mockCreateSession.mockClear(); + }); + + test('omits customAgents when nodeConfig.agents is absent', async () => { + await collect(new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: 'gpt-5' })); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.customAgents).toBeUndefined(); + }); + + test('omits customAgents when agents is an empty object', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + nodeConfig: { agents: {} }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.customAgents).toBeUndefined(); + }); + + test('maps name/description/prompt verbatim, passes tools allowlist', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + nodeConfig: { + agents: { + 'code-searcher': { + description: 'Searches the repo for relevant code', + prompt: 'You are a code-search specialist. Be thorough.', + tools: ['read_file', 'grep'], + }, + }, + }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.customAgents).toEqual([ + { + name: 'code-searcher', + description: 'Searches the repo for relevant code', + prompt: 'You are a code-search specialist. Be thorough.', + tools: ['read_file', 'grep'], + }, + ]); + }); + + test('omits tools when not specified (Copilot treats undefined as "all")', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + nodeConfig: { + agents: { + 'free-agent': { + description: 'has all tools', + prompt: 'do anything', + }, + }, + }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + const agents = cfg.customAgents as Array>; + expect(agents[0]).not.toHaveProperty('tools'); + }); + + test('emits one warning per agent listing ignored Claude-specific fields', async () => { + const chunks = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + nodeConfig: { + agents: { + 'over-spec': { + description: 'has everything', + prompt: 'hello', + model: 'claude-sonnet-4.5', + disallowedTools: ['shell'], + skills: ['planning'], + maxTurns: 5, + }, + }, + }, + }) + ); + + const systemChunks = chunks.filter( + (c): c is { type: 'system'; content: string } => + typeof c === 'object' && c !== null && (c as { type?: string }).type === 'system' + ); + const match = systemChunks.find(c => c.content.includes("agent 'over-spec'")); + expect(match).toBeDefined(); + expect(match?.content).toContain('model'); + expect(match?.content).toContain('disallowedTools'); + expect(match?.content).toContain('skills'); + expect(match?.content).toContain('maxTurns'); + + // SessionConfig.customAgents still gets the agent with the supported fields only + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + const agents = cfg.customAgents as Array>; + expect(agents).toHaveLength(1); + expect(agents[0]).toEqual({ + name: 'over-spec', + description: 'has everything', + prompt: 'hello', + }); + }); + + test('maps multiple agents preserving key order', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + nodeConfig: { + agents: { + 'first-one': { description: 'a', prompt: 'p1' }, + 'second-one': { description: 'b', prompt: 'p2' }, + }, + }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + const agents = cfg.customAgents as Array<{ name: string }>; + expect(agents.map(a => a.name)).toEqual(['first-one', 'second-one']); + }); + + test('does NOT set SessionConfig.agent (Archon invokes sub-agents via Task tool)', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + nodeConfig: { + agents: { + 'only-agent': { description: 'x', prompt: 'y' }, + }, + }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.agent).toBeUndefined(); + }); +}); diff --git a/packages/providers/src/community/copilot/binary-resolver.test.ts b/packages/providers/src/community/copilot/binary-resolver.test.ts new file mode 100644 index 0000000000..55a4f7cb30 --- /dev/null +++ b/packages/providers/src/community/copilot/binary-resolver.test.ts @@ -0,0 +1,152 @@ +import { afterEach, beforeEach, describe, expect, mock, spyOn, test } from 'bun:test'; +import { chmodSync, mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +const mockLogger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', +}; + +// Per-test Archon home — mutated from beforeEach so each test run is isolated +// and never writes to a shared `/tmp/.archon`. +let archonHome = ''; + +mock.module('@archon/paths', () => ({ + BUNDLED_IS_BINARY: true, + getArchonHome: () => archonHome, + createLogger: () => mockLogger, +})); + +import * as resolver from './binary-resolver'; + +function writeExecutable(path: string): void { + writeFileSync(path, '#!/bin/sh\n'); + chmodSync(path, 0o755); +} + +let tmpRoot = ''; +let originalCopilotCliPath: string | undefined; + +describe('resolveCopilotCliPath', () => { + beforeEach(() => { + originalCopilotCliPath = process.env.COPILOT_CLI_PATH; + delete process.env.COPILOT_CLI_PATH; + tmpRoot = mkdtempSync(join(tmpdir(), 'copilot-bin-')); + archonHome = join(tmpRoot, 'archon-home'); + mkdirSync(archonHome, { recursive: true }); + }); + + afterEach(() => { + rmSync(tmpRoot, { recursive: true, force: true }); + if (originalCopilotCliPath === undefined) { + delete process.env.COPILOT_CLI_PATH; + } else { + process.env.COPILOT_CLI_PATH = originalCopilotCliPath; + } + }); + + test('uses env override when present', async () => { + const binaryPath = join(tmpRoot, 'copilot'); + writeExecutable(binaryPath); + process.env.COPILOT_CLI_PATH = binaryPath; + + await expect(resolver.resolveCopilotCliPath()).resolves.toBe(binaryPath); + }); + + test('throws when env override path is missing', async () => { + process.env.COPILOT_CLI_PATH = '/missing/copilot'; + + await expect(resolver.resolveCopilotCliPath()).rejects.toThrow('COPILOT_CLI_PATH'); + }); + + test('throws when env override path is a directory, not a file', async () => { + const dirPath = join(tmpRoot, 'copilot-dir'); + mkdirSync(dirPath, { recursive: true }); + process.env.COPILOT_CLI_PATH = dirPath; + + await expect(resolver.resolveCopilotCliPath()).rejects.toThrow('not an executable file'); + }); + + test('throws when env override path is not executable', async () => { + const nonExec = join(tmpRoot, 'copilot-noexec'); + writeFileSync(nonExec, '#!/bin/sh\n'); + chmodSync(nonExec, 0o644); + process.env.COPILOT_CLI_PATH = nonExec; + + // win32 skips the exec-bit check — skip assertion there. + if (process.platform === 'win32') { + await expect(resolver.resolveCopilotCliPath()).resolves.toBe(nonExec); + return; + } + await expect(resolver.resolveCopilotCliPath()).rejects.toThrow('not an executable file'); + }); + + test('uses config override when present', async () => { + const binaryPath = join(tmpRoot, 'copilot'); + writeExecutable(binaryPath); + + await expect(resolver.resolveCopilotCliPath(binaryPath)).resolves.toBe(binaryPath); + }); + + test('uses vendor path in binary mode when available', async () => { + // Hermetic: stub the executable probe so the test does not depend on + // the real filesystem, the platform-specific binary name (`copilot.exe` + // on win32), or a system-installed Copilot CLI leaking in via PATH. + // Mirrors the sibling Codex resolver test. + const spy = spyOn(resolver, 'isExecutableFile').mockImplementation((path: string) => + path.replace(/\\/g, '/').includes('/vendor/copilot/') + ); + try { + const result = await resolver.resolveCopilotCliPath(); + expect(result).toBeDefined(); + expect(result!.replace(/\\/g, '/')).toContain('/vendor/copilot/'); + } finally { + spy.mockRestore(); + } + }); +}); + +describe('isExecutableFile', () => { + beforeEach(() => { + tmpRoot = mkdtempSync(join(tmpdir(), 'copilot-exec-')); + }); + + afterEach(() => { + rmSync(tmpRoot, { recursive: true, force: true }); + }); + + test('returns true for an executable file', () => { + const path = join(tmpRoot, 'copilot'); + writeExecutable(path); + expect(resolver.isExecutableFile(path)).toBe(true); + }); + + test('returns false for a missing path', () => { + expect(resolver.isExecutableFile(join(tmpRoot, 'nope'))).toBe(false); + }); + + test('returns false for a directory', () => { + const path = join(tmpRoot, 'a-dir'); + mkdirSync(path, { recursive: true }); + expect(resolver.isExecutableFile(path)).toBe(false); + }); + + test('returns false for a non-executable file on posix', () => { + if (process.platform === 'win32') return; + const path = join(tmpRoot, 'noexec'); + writeFileSync(path, '#!/bin/sh\n'); + chmodSync(path, 0o644); + expect(resolver.isExecutableFile(path)).toBe(false); + }); +}); diff --git a/packages/providers/src/community/copilot/binary-resolver.ts b/packages/providers/src/community/copilot/binary-resolver.ts new file mode 100644 index 0000000000..e5b63d13da --- /dev/null +++ b/packages/providers/src/community/copilot/binary-resolver.ts @@ -0,0 +1,138 @@ +import { + accessSync as _accessSync, + constants as fsConstants, + existsSync as _existsSync, + statSync as _statSync, +} from 'node:fs'; +import { join } from 'node:path'; +import { execFileSync } from 'node:child_process'; +import { BUNDLED_IS_BINARY, getArchonHome, createLogger } from '@archon/paths'; + +/** Wrapper for existsSync — enables spyOn in tests. */ +export function fileExists(path: string): boolean { + return _existsSync(path); +} + +/** + * True if `path` is a regular file and is executable by the current user. + * On win32, Node's stat.mode doesn't track Unix exec bits, so we fall back + * to "is a file" — which matches how `where` / `PATH` resolution works there. + */ +export function isExecutableFile(path: string): boolean { + try { + const stat = _statSync(path); + if (!stat.isFile()) return false; + if (process.platform === 'win32') return true; + // accessSync (X_OK) checks current-user executability — mode & 0o111 only + // proves *some* exec bit exists (e.g., mode 001 fails for the owner). + _accessSync(path, fsConstants.X_OK); + return true; + } catch { + return false; + } +} + +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('copilot-binary'); + return cachedLog; +} + +const COPILOT_VENDOR_DIR = 'vendor/copilot'; + +function resolveFromPath(): string | undefined { + const whichCmd = process.platform === 'win32' ? 'where' : 'which'; + const executable = process.platform === 'win32' ? 'copilot.exe' : 'copilot'; + + try { + const output = execFileSync(whichCmd, [executable], { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'ignore'], + }).trim(); + const first = output.split(/\r?\n/)[0]?.trim(); + return first || undefined; + } catch { + return undefined; + } +} + +function getVendorBinaryName(): string | undefined { + if (!['darwin', 'linux', 'win32'].includes(process.platform)) return undefined; + if (process.arch !== 'x64' && process.arch !== 'arm64') return undefined; + return process.platform === 'win32' ? 'copilot.exe' : 'copilot'; +} + +const INSTALL_INSTRUCTIONS = + 'GitHub Copilot CLI was not found.\n\n' + + 'To fix, choose one of:\n' + + ' 1. Install globally: npm install -g @github/copilot\n' + + ' Then set: COPILOT_CLI_PATH=$(which copilot)\n\n' + + ' 2. Persist the path in ~/.archon/config.yaml:\n' + + ' assistants:\n' + + ' copilot:\n' + + ' copilotCliPath: /absolute/path/to/copilot\n\n' + + ' 3. Place the binary under ~/.archon/vendor/copilot/\n\n' + + ' 4. Or, if you are running from source, install @github/copilot-sdk deps with bun install.\n'; + +/** + * Resolve the Copilot CLI path. + * + * In dev mode, explicit env/config/path overrides are honored, otherwise the + * SDK can use its own bundled CLI from node_modules. + * + * In compiled Archon binaries, automatic node_modules resolution is unavailable, + * so we must resolve a real executable path or fail loudly. + */ +export async function resolveCopilotCliPath( + configCopilotCliPath?: string +): Promise { + const envPath = process.env.COPILOT_CLI_PATH; + if (envPath) { + if (!isExecutableFile(envPath)) { + throw new Error( + `COPILOT_CLI_PATH is set to "${envPath}" but it is not an executable file.\n` + + 'Please verify the path points to the Copilot CLI executable (chmod +x if needed).' + ); + } + getLog().info({ binaryPath: envPath, source: 'env' }, 'copilot.binary_resolved'); + return envPath; + } + + if (configCopilotCliPath) { + if (!isExecutableFile(configCopilotCliPath)) { + throw new Error( + `assistants.copilot.copilotCliPath is set to "${configCopilotCliPath}" but it is not an executable file.\n` + + 'Please verify the path in .archon/config.yaml points to the Copilot CLI executable (chmod +x if needed).' + ); + } + getLog().info( + { binaryPath: configCopilotCliPath, source: 'config' }, + 'copilot.binary_resolved' + ); + return configCopilotCliPath; + } + + if (BUNDLED_IS_BINARY) { + const vendorBinaryName = getVendorBinaryName(); + if (vendorBinaryName) { + const vendorBinaryPath = join(getArchonHome(), COPILOT_VENDOR_DIR, vendorBinaryName); + if (isExecutableFile(vendorBinaryPath)) { + getLog().info( + { binaryPath: vendorBinaryPath, source: 'vendor' }, + 'copilot.binary_resolved' + ); + return vendorBinaryPath; + } + } + } + + const fromPath = resolveFromPath(); + if (fromPath && isExecutableFile(fromPath)) { + getLog().info({ binaryPath: fromPath, source: 'path' }, 'copilot.binary_resolved'); + return fromPath; + } + + if (!BUNDLED_IS_BINARY) return undefined; + + throw new Error(INSTALL_INSTRUCTIONS); +} diff --git a/packages/providers/src/community/copilot/capabilities.ts b/packages/providers/src/community/copilot/capabilities.ts new file mode 100644 index 0000000000..cf7339e8f9 --- /dev/null +++ b/packages/providers/src/community/copilot/capabilities.ts @@ -0,0 +1,23 @@ +import type { ProviderCapabilities } from '../../types'; + +/** + * Copilot capabilities are intentionally conservative. + * + * The SDK can do more than this provider currently exposes, but the flags here + * only describe behavior that is wired to Archon's existing workflow surface. + */ +export const COPILOT_CAPABILITIES: ProviderCapabilities = { + sessionResume: true, + mcp: true, + hooks: false, + skills: true, + agents: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: false, + effortControl: true, + thinkingControl: true, + fallbackModel: false, + sandbox: false, +}; diff --git a/packages/providers/src/community/copilot/config.test.ts b/packages/providers/src/community/copilot/config.test.ts new file mode 100644 index 0000000000..9d287a2cef --- /dev/null +++ b/packages/providers/src/community/copilot/config.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, test } from 'bun:test'; +import { parseCopilotConfig } from './config'; + +describe('parseCopilotConfig', () => { + test('parses supported fields', () => { + expect( + parseCopilotConfig({ + model: 'gpt-5', + copilotCliPath: '/usr/local/bin/copilot', + configDir: '/tmp/copilot', + enableConfigDiscovery: true, + useLoggedInUser: false, + logLevel: 'debug', + }) + ).toEqual({ + model: 'gpt-5', + copilotCliPath: '/usr/local/bin/copilot', + configDir: '/tmp/copilot', + enableConfigDiscovery: true, + useLoggedInUser: false, + logLevel: 'debug', + }); + }); + + test('drops invalid values silently', () => { + expect( + parseCopilotConfig({ + model: 123, + copilotCliPath: false, + configDir: null, + enableConfigDiscovery: 'yes', + useLoggedInUser: 'no', + logLevel: 'verbose', + } as unknown as Record) + ).toEqual({}); + }); +}); diff --git a/packages/providers/src/community/copilot/config.ts b/packages/providers/src/community/copilot/config.ts new file mode 100644 index 0000000000..2d74e3af6a --- /dev/null +++ b/packages/providers/src/community/copilot/config.ts @@ -0,0 +1,72 @@ +export interface CopilotProviderDefaults { + [key: string]: unknown; + /** Default Copilot model, e.g. `gpt-5` or `claude-sonnet-4.5`. */ + model?: string; + /** + * Path to the Copilot CLI executable. Useful in compiled Archon binaries or + * when the user wants to override the SDK's bundled CLI resolution. + */ + copilotCliPath?: string; + /** + * Override Copilot's config directory. + */ + configDir?: string; + /** + * Opt in to Copilot's config discovery from the repo. + * Disabled by default so arbitrary repos do not implicitly load MCP/skills. + */ + enableConfigDiscovery?: boolean; + /** + * Reuse the CLI's logged-in user credentials when no explicit token is + * provided. Defaults to true. + */ + useLoggedInUser?: boolean; + /** + * CLI log level. + */ + logLevel?: 'none' | 'error' | 'warning' | 'info' | 'debug' | 'all'; +} + +/** + * Lenient parser: fields with the wrong type (or `logLevel` outside the + * enumerated set) are silently dropped rather than throwing. Matches the + * fallback behavior of the other provider config loaders so a single bad + * key in `.archon/config.yaml` doesn't take the whole provider offline. See + * the `drops invalid values silently` test for the contract. + */ +export function parseCopilotConfig(raw: Record): CopilotProviderDefaults { + const result: CopilotProviderDefaults = {}; + + if (typeof raw.model === 'string') { + result.model = raw.model; + } + + if (typeof raw.copilotCliPath === 'string') { + result.copilotCliPath = raw.copilotCliPath; + } + + if (typeof raw.configDir === 'string') { + result.configDir = raw.configDir; + } + + if (typeof raw.enableConfigDiscovery === 'boolean') { + result.enableConfigDiscovery = raw.enableConfigDiscovery; + } + + if (typeof raw.useLoggedInUser === 'boolean') { + result.useLoggedInUser = raw.useLoggedInUser; + } + + if ( + raw.logLevel === 'none' || + raw.logLevel === 'error' || + raw.logLevel === 'warning' || + raw.logLevel === 'info' || + raw.logLevel === 'debug' || + raw.logLevel === 'all' + ) { + result.logLevel = raw.logLevel; + } + + return result; +} diff --git a/packages/providers/src/community/copilot/index.ts b/packages/providers/src/community/copilot/index.ts new file mode 100644 index 0000000000..7afaf3945e --- /dev/null +++ b/packages/providers/src/community/copilot/index.ts @@ -0,0 +1,5 @@ +export { resolveCopilotCliPath } from './binary-resolver'; +export { COPILOT_CAPABILITIES } from './capabilities'; +export { parseCopilotConfig, type CopilotProviderDefaults } from './config'; +export { CopilotProvider } from './provider'; +export { registerCopilotProvider } from './registration'; diff --git a/packages/providers/src/community/copilot/mcp-translation.test.ts b/packages/providers/src/community/copilot/mcp-translation.test.ts new file mode 100644 index 0000000000..6db4d4d438 --- /dev/null +++ b/packages/providers/src/community/copilot/mcp-translation.test.ts @@ -0,0 +1,187 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'; +import { mkdtempSync, writeFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +const mockLogger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', +}; + +mock.module('@archon/paths', () => ({ + createLogger: () => mockLogger, +})); + +mock.module('./binary-resolver', () => ({ + resolveCopilotCliPath: async () => '/usr/local/bin/copilot', +})); + +type SessionHandler = (event: Record) => void; + +let registeredHandlers: Record = {}; +const mockSession = { + sessionId: 'copilot-session-mcp', + on: mock((eventType: string, handler: SessionHandler) => { + registeredHandlers[eventType] ??= []; + registeredHandlers[eventType].push(handler); + return () => undefined; + }), + sendAndWait: mock(async () => ({ + data: { content: 'ok', messageId: 'm' }, + })), + abort: mock(async () => undefined), + disconnect: mock(async () => undefined), +}; + +const capturedSessionConfigs: Array> = []; +const mockCreateSession = mock(async (config: Record) => { + capturedSessionConfigs.push(config); + return mockSession; +}); + +mock.module('@github/copilot-sdk', () => ({ + approveAll: () => ({ kind: 'approved' }), + CopilotClient: class MockCopilotClient { + createSession = mockCreateSession; + resumeSession = mock(async () => mockSession); + stop = mock(async () => []); + }, +})); + +import { CopilotProvider } from './provider'; + +async function collectChunks(generator: AsyncGenerator): Promise { + const chunks: unknown[] = []; + for await (const chunk of generator) chunks.push(chunk); + return chunks; +} + +let workDir = ''; +let originalCopilotMcpTestToken: string | undefined; + +describe('applyMcpServers', () => { + beforeEach(() => { + originalCopilotMcpTestToken = process.env.COPILOT_MCP_TEST_TOKEN; + registeredHandlers = {}; + capturedSessionConfigs.length = 0; + mockCreateSession.mockClear(); + workDir = mkdtempSync(join(tmpdir(), 'copilot-mcp-')); + }); + + afterEach(() => { + rmSync(workDir, { recursive: true, force: true }); + if (originalCopilotMcpTestToken === undefined) { + delete process.env.COPILOT_MCP_TEST_TOKEN; + } else { + process.env.COPILOT_MCP_TEST_TOKEN = originalCopilotMcpTestToken; + } + }); + + test('omits mcpServers when nodeConfig.mcp is absent', async () => { + await collectChunks( + new CopilotProvider().sendQuery('hi', workDir, undefined, { model: 'gpt-5' }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.mcpServers).toBeUndefined(); + }); + + test('loads MCP JSON and assigns to SessionConfig.mcpServers', async () => { + const mcpPath = join(workDir, 'mcp.json'); + writeFileSync( + mcpPath, + JSON.stringify({ + 'example-server': { + type: 'local', + command: 'node', + args: ['server.js'], + tools: ['*'], + }, + }) + ); + + await collectChunks( + new CopilotProvider().sendQuery('hi', workDir, undefined, { + model: 'gpt-5', + nodeConfig: { mcp: 'mcp.json' }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.mcpServers).toEqual({ + 'example-server': { + type: 'local', + command: 'node', + args: ['server.js'], + tools: ['*'], + }, + }); + }); + + test('expands env vars and warns on missing ones', async () => { + process.env.COPILOT_MCP_TEST_TOKEN = 'secret-value'; + const mcpPath = join(workDir, 'mcp.json'); + writeFileSync( + mcpPath, + JSON.stringify({ + 'env-server': { + type: 'local', + command: 'node', + args: ['server.js'], + tools: ['*'], + env: { + SET_VAR: '$COPILOT_MCP_TEST_TOKEN', + MISSING_VAR: '$COPILOT_MCP_NOT_DEFINED', + }, + }, + }) + ); + + const chunks = await collectChunks( + new CopilotProvider().sendQuery('hi', workDir, undefined, { + model: 'gpt-5', + nodeConfig: { mcp: 'mcp.json' }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + const servers = cfg.mcpServers as Record }>; + expect(servers['env-server']?.env?.SET_VAR).toBe('secret-value'); + expect(servers['env-server']?.env?.MISSING_VAR).toBe(''); + + expect(chunks).toContainEqual( + expect.objectContaining({ + type: 'system', + content: expect.stringContaining('COPILOT_MCP_NOT_DEFINED'), + }) + ); + }); + + test('propagates loadMcpConfig errors (missing file)', async () => { + let caught: Error | undefined; + try { + await collectChunks( + new CopilotProvider().sendQuery('hi', workDir, undefined, { + model: 'gpt-5', + nodeConfig: { mcp: 'does-not-exist.json' }, + }) + ); + } catch (err) { + caught = err as Error; + } + expect(caught?.message).toContain('MCP config file not found'); + }); +}); diff --git a/packages/providers/src/community/copilot/provider-hardening.test.ts b/packages/providers/src/community/copilot/provider-hardening.test.ts new file mode 100644 index 0000000000..65d5f12a79 --- /dev/null +++ b/packages/providers/src/community/copilot/provider-hardening.test.ts @@ -0,0 +1,196 @@ +import { beforeEach, describe, expect, mock, test } from 'bun:test'; + +const mockLogger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', +}; + +mock.module('@archon/paths', () => ({ + createLogger: () => mockLogger, +})); + +mock.module('./binary-resolver', () => ({ + resolveCopilotCliPath: async () => '/usr/local/bin/copilot', +})); + +type SessionHandler = (event: Record) => void; + +let registeredHandlers: Record = {}; +let scriptedFinalMessage: { data: { content: string; messageId: string } } | undefined; +let sendAndWaitImpl: (() => Promise) | undefined; +let disconnectImpl: () => Promise = async () => undefined; +let stopImpl: () => Promise = async () => []; + +const mockSendAndWait = mock(async () => { + if (sendAndWaitImpl) return await sendAndWaitImpl(); + return scriptedFinalMessage; +}); +const mockDisconnect = mock(async () => disconnectImpl()); +const mockStop = mock(async () => stopImpl()); + +const mockSession = { + sessionId: 'copilot-session-hardening', + on: mock((eventType: string, handler: SessionHandler) => { + registeredHandlers[eventType] ??= []; + registeredHandlers[eventType].push(handler); + return () => undefined; + }), + sendAndWait: mockSendAndWait, + abort: mock(async () => undefined), + disconnect: mockDisconnect, +}; + +const capturedSessionConfigs: Array> = []; +const mockCreateSession = mock(async (config: Record) => { + capturedSessionConfigs.push(config); + return mockSession; +}); + +mock.module('@github/copilot-sdk', () => ({ + approveAll: () => ({ kind: 'approved' }), + CopilotClient: class MockCopilotClient { + createSession = mockCreateSession; + resumeSession = mock(async () => mockSession); + stop = mockStop; + }, +})); + +import { CopilotProvider } from './provider'; + +function emit(eventType: string, data: Record): void { + for (const handler of registeredHandlers[eventType] ?? []) { + handler({ + id: crypto.randomUUID(), + timestamp: new Date().toISOString(), + parentId: null, + type: eventType, + data, + }); + } +} + +async function collect( + generator: AsyncGenerator +): Promise<{ chunks: unknown[]; error?: Error }> { + const chunks: unknown[] = []; + try { + for await (const chunk of generator) chunks.push(chunk); + return { chunks }; + } catch (error) { + return { chunks, error: error as Error }; + } +} + +describe('CopilotProvider hardening', () => { + beforeEach(() => { + registeredHandlers = {}; + scriptedFinalMessage = { data: { content: 'FALLBACK', messageId: 'final' } }; + sendAndWaitImpl = undefined; + disconnectImpl = async (): Promise => undefined; + stopImpl = async (): Promise => []; + capturedSessionConfigs.length = 0; + mockSendAndWait.mockClear(); + mockDisconnect.mockClear(); + mockStop.mockClear(); + mockCreateSession.mockClear(); + }); + + test('rejects early when abortSignal is already aborted', async () => { + const controller = new AbortController(); + controller.abort(); + + const { error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + abortSignal: controller.signal, + }) + ); + + expect(error).toBeDefined(); + expect(error?.name).toBe('AbortError'); + // sendAndWait must NOT have been entered + expect(mockSendAndWait).toHaveBeenCalledTimes(0); + }); + + test('trims whitespace from the model before assigning to SessionConfig', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: ' gpt-5-mini ' }) + ); + + expect(capturedSessionConfigs[0]?.model).toBe('gpt-5-mini'); + }); + + test('falls back to assistantConfig.model and trims that too', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + assistantConfig: { model: ' gpt-5 ' }, + }) + ); + + expect(capturedSessionConfigs[0]?.model).toBe('gpt-5'); + }); + + test('does NOT emit a spurious session-error warning when fallback assistant content was delivered', async () => { + // Simulate: no streaming deltas, sendAndWait emits session.error and + // still returns a final assistant message. + sendAndWaitImpl = async () => { + emit('session.error', { errorType: 'transient', message: 'some transient error' }); + return scriptedFinalMessage; + }; + + const { chunks, error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: 'gpt-5' }) + ); + + expect(error).toBeUndefined(); + expect(chunks).toContainEqual({ type: 'assistant', content: 'FALLBACK' }); + // The session-error should NOT produce a system warning when fallback + // content was delivered — this is the bug Devin flagged. + expect(chunks).not.toContainEqual( + expect.objectContaining({ + type: 'system', + content: expect.stringContaining('some transient error'), + }) + ); + }); + + test('cleanup failure in disconnect does not mask the primary result', async () => { + disconnectImpl = async (): Promise => { + throw new Error('disconnect blew up'); + }; + + const { chunks, error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: 'gpt-5' }) + ); + + expect(error).toBeUndefined(); + expect(chunks).toContainEqual(expect.objectContaining({ type: 'result' })); + }); + + test('cleanup failure in client.stop does not mask the friendly primary error', async () => { + sendAndWaitImpl = async () => { + throw new Error('Model not available'); + }; + stopImpl = async (): Promise => { + throw new Error('client.stop blew up'); + }; + + const { error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: 'gpt-5' }) + ); + + // The friendly model-access error must survive the stop() throw. + expect(error?.message).toContain('Copilot model access error'); + expect(error?.message).not.toContain('client.stop blew up'); + }); +}); diff --git a/packages/providers/src/community/copilot/provider-lazy-load.test.ts b/packages/providers/src/community/copilot/provider-lazy-load.test.ts new file mode 100644 index 0000000000..a1beb4f289 --- /dev/null +++ b/packages/providers/src/community/copilot/provider-lazy-load.test.ts @@ -0,0 +1,47 @@ +/** + * Regression test: Copilot SDK must not load at module-import time. + * + * Pi's `@mariozechner/pi-coding-agent/dist/config.js` runs file I/O at + * top-level, which crashes compiled Archon binaries at startup with ENOENT + * (#1355, v0.3.7). `@github/copilot-sdk` has not been audited for the same + * pattern; treating it as if it might do the same is the conservative + * default. Any static value-import from `@github/copilot-sdk` reachable from + * `registerCommunityProviders()` defeats this guarantee. + * + * Detection strategy: replace `@github/copilot-sdk` with a `mock.module` + * factory that flips a boolean the first time something resolves it. Walk the + * same registration path the CLI and server take and assert the flag did not + * tip. A throwing factory would abort the failing import before the `expect` + * runs, producing a crash at resolution time with no assertion context — the + * counter keeps failures actionable. + * + * Runs in its own `bun test` invocation because Bun's `mock.module` is + * process-wide and would poison `provider.test.ts`, which installs a benign + * stub for the same module (see CLAUDE.md on test isolation). + */ +import { expect, mock, test } from 'bun:test'; + +let copilotSdkLoaded = false; + +mock.module('@github/copilot-sdk', () => { + copilotSdkLoaded = true; + return {}; +}); + +test('registering and instantiating the Copilot provider does not eagerly load the Copilot SDK', async () => { + const { clearRegistry, getAgentProvider, registerCommunityProviders } = + await import('../../registry'); + + clearRegistry(); + registerCommunityProviders(); + + const provider = getAgentProvider('copilot'); + expect(provider.getType()).toBe('copilot'); + expect(provider.getCapabilities()).toBeDefined(); + + // If this fails, someone reintroduced a static (non-type) import from + // `@github/copilot-sdk` somewhere in the module chain reachable from + // `registerCommunityProviders()`. Fix by moving the value import inside + // `CopilotProvider.sendQuery()`'s dynamic-import block. + expect(copilotSdkLoaded).toBe(false); +}); diff --git a/packages/providers/src/community/copilot/provider.test.ts b/packages/providers/src/community/copilot/provider.test.ts new file mode 100644 index 0000000000..59a872dfa8 --- /dev/null +++ b/packages/providers/src/community/copilot/provider.test.ts @@ -0,0 +1,266 @@ +import { beforeEach, describe, expect, mock, test } from 'bun:test'; + +const mockLogger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', +}; + +mock.module('@archon/paths', () => ({ + createLogger: () => mockLogger, +})); + +const mockResolveCopilotCliPath = mock(async () => '/usr/local/bin/copilot'); +mock.module('./binary-resolver', () => ({ + resolveCopilotCliPath: mockResolveCopilotCliPath, +})); + +type SessionHandler = (event: Record) => void; + +let registeredHandlers: Record = {}; +let scriptedFinalMessage: { data: { content: string; messageId: string } } | undefined; +let sendAndWaitImpl: + | (() => Promise<{ data: { content: string; messageId: string } } | undefined>) + | undefined; + +const mockAbort = mock(async () => undefined); +const mockDisconnect = mock(async () => undefined); +const mockSendAndWait = mock(async () => { + if (sendAndWaitImpl) return await sendAndWaitImpl(); + return scriptedFinalMessage; +}); + +const mockSession = { + sessionId: 'copilot-session-123', + on: mock((eventType: string, handler: SessionHandler) => { + registeredHandlers[eventType] ??= []; + registeredHandlers[eventType].push(handler); + return () => { + registeredHandlers[eventType] = (registeredHandlers[eventType] ?? []).filter( + h => h !== handler + ); + }; + }), + sendAndWait: mockSendAndWait, + abort: mockAbort, + disconnect: mockDisconnect, +}; + +const createdClients: Array> = []; +const mockCreateSession = mock(async () => mockSession); +const mockResumeSession = mock(async () => mockSession); +const mockStop = mock(async () => []); + +mock.module('@github/copilot-sdk', () => ({ + approveAll: () => ({ kind: 'approved' }), + CopilotClient: class MockCopilotClient { + constructor(options: Record) { + createdClients.push(options); + } + createSession = mockCreateSession; + resumeSession = mockResumeSession; + stop = mockStop; + }, +})); + +import { CopilotProvider } from './provider'; +import { COPILOT_CAPABILITIES } from './capabilities'; + +function emit(eventType: string, data: Record): void { + for (const handler of registeredHandlers[eventType] ?? []) { + handler({ + id: crypto.randomUUID(), + timestamp: new Date().toISOString(), + parentId: null, + type: eventType, + data, + }); + } +} + +async function collect( + generator: AsyncGenerator +): Promise<{ chunks: unknown[]; error?: Error }> { + const chunks: unknown[] = []; + try { + for await (const chunk of generator) chunks.push(chunk); + return { chunks }; + } catch (error) { + return { chunks, error: error as Error }; + } +} + +describe('CopilotProvider', () => { + beforeEach(() => { + registeredHandlers = {}; + scriptedFinalMessage = { data: { content: 'COPILOT_OK', messageId: 'msg-final' } }; + sendAndWaitImpl = undefined; + createdClients.length = 0; + mockResolveCopilotCliPath.mockClear(); + mockCreateSession.mockClear(); + mockResumeSession.mockClear(); + mockStop.mockClear(); + mockSendAndWait.mockClear(); + mockAbort.mockClear(); + mockDisconnect.mockClear(); + }); + + test('reports provider type and capabilities', () => { + const provider = new CopilotProvider(); + expect(provider.getType()).toBe('copilot'); + expect(provider.getCapabilities()).toEqual(COPILOT_CAPABILITIES); + }); + + test('streams assistant, thinking, tool, and result chunks', async () => { + sendAndWaitImpl = async () => { + emit('assistant.reasoning_delta', { reasoningId: 'r1', deltaContent: 'thinking...' }); + emit('assistant.message_delta', { messageId: 'm1', deltaContent: 'hello ' }); + emit('tool.execution_start', { + toolCallId: 'tool-1', + toolName: 'read_file', + arguments: { path: 'README.md' }, + }); + emit('tool.execution_complete', { + toolCallId: 'tool-1', + success: true, + result: { content: 'ok', detailedContent: 'full output' }, + }); + emit('assistant.usage', { model: 'gpt-5', inputTokens: 11, outputTokens: 7, cost: 1 }); + return scriptedFinalMessage; + }; + + const { chunks, error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + env: { GH_TOKEN: 'token-123', PROJECT_ONLY: 'yes' }, + }) + ); + + expect(error).toBeUndefined(); + expect(chunks).toContainEqual({ type: 'thinking', content: 'thinking...' }); + expect(chunks).toContainEqual({ type: 'assistant', content: 'hello ' }); + expect(chunks).toContainEqual({ + type: 'tool', + toolName: 'read_file', + toolInput: { path: 'README.md' }, + toolCallId: 'tool-1', + }); + expect(chunks).toContainEqual({ + type: 'tool_result', + toolName: 'read_file', + toolOutput: 'full output', + toolCallId: 'tool-1', + }); + expect(chunks).toContainEqual({ + type: 'result', + sessionId: 'copilot-session-123', + tokens: { input: 11, output: 7, total: 18, cost: 1 }, + cost: 1, + }); + + expect(createdClients[0]).toMatchObject({ + cliPath: '/usr/local/bin/copilot', + cwd: '/repo', + githubToken: 'token-123', + }); + expect((createdClients[0].env as Record).PROJECT_ONLY).toBe('yes'); + }); + + test('falls back to final assistant message when no deltas were streamed', async () => { + const { chunks, error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: 'gpt-5' }) + ); + + expect(error).toBeUndefined(); + expect(chunks).toContainEqual({ type: 'assistant', content: 'COPILOT_OK' }); + }); + + test('uses resumeSession when resumeSessionId is provided', async () => { + const { error } = await collect( + new CopilotProvider().sendQuery('resume me', '/repo', 'resume-123', { model: 'gpt-5' }) + ); + + expect(error).toBeUndefined(); + expect(mockResumeSession).toHaveBeenCalledTimes(1); + expect(mockCreateSession).toHaveBeenCalledTimes(0); + }); + + test('surfaces reasoning warning for unsupported thinking config shapes', async () => { + const { chunks, error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + nodeConfig: { thinking: { type: 'enabled', budget_tokens: 1024 } }, + }) + ); + + expect(error).toBeUndefined(); + expect(chunks).toContainEqual({ + type: 'system', + content: + '⚠️ Copilot ignored `thinking` (object form is Claude-specific). Use `effort: low|medium|high|max` instead.', + }); + }); + + test('returns a friendly auth error', async () => { + sendAndWaitImpl = async () => { + emit('session.error', { errorType: 'authentication', message: 'not authenticated' }); + throw new Error('not authenticated'); + }; + + const { error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: 'gpt-5' }) + ); + + expect(error?.message).toContain('Copilot authentication failed'); + expect(error?.message).toContain('copilot login'); + }); + + describe('useLoggedInUser precedence', () => { + test('defaults to true when neither env token nor explicit config is set', async () => { + await collect(new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: 'gpt-5' })); + expect(createdClients[0]?.useLoggedInUser).toBe(true); + }); + + test('defaults to false when an env token is present and config is silent', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + env: { GH_TOKEN: 'env-token' }, + }) + ); + expect(createdClients[0]?.useLoggedInUser).toBe(false); + }); + + test('honors explicit useLoggedInUser:true even when an env token is present', async () => { + // Real-world case: user has GH_TOKEN exported for a different gh CLI + // account and wants Copilot to use their logged-in subscription instead. + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + env: { GH_TOKEN: 'env-token' }, + assistantConfig: { useLoggedInUser: true }, + }) + ); + expect(createdClients[0]?.useLoggedInUser).toBe(true); + }); + + test('honors explicit useLoggedInUser:false when no env token is present', async () => { + await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + assistantConfig: { useLoggedInUser: false }, + }) + ); + expect(createdClients[0]?.useLoggedInUser).toBe(false); + }); + }); +}); diff --git a/packages/providers/src/community/copilot/provider.ts b/packages/providers/src/community/copilot/provider.ts new file mode 100644 index 0000000000..9c8a3b3f00 --- /dev/null +++ b/packages/providers/src/community/copilot/provider.ts @@ -0,0 +1,633 @@ +// IMPORTANT: Do NOT add static `import { ... } from '@github/copilot-sdk'` here +// (only `import type` is safe). The Copilot SDK has not been audited for +// module-load side effects and the Pi precedent (#1355, v0.3.7) shows compiled +// Archon binaries crash at startup if any reachable SDK module reads files at +// import time. The `CopilotClient` constructor and `approveAll` value binding +// are dynamic-imported inside `sendQuery()` below so the SDK only loads when a +// Copilot workflow is actually invoked. The `provider-lazy-load.test.ts` +// regression test locks this invariant in. +import type { + AssistantMessageEvent, + CopilotClient, + CustomAgentConfig, + MCPServerConfig, + SessionConfig, + SessionEvent, +} from '@github/copilot-sdk'; +import { createLogger } from '@archon/paths'; + +import type { IAgentProvider, MessageChunk, SendQueryOptions } from '../../types'; +import { loadMcpConfig } from '../../claude/provider'; +import { resolveSkillDirectories } from '../../shared/skills'; +import { + augmentPromptForJsonSchema, + tryParseStructuredOutput, +} from '../../shared/structured-output'; +import { COPILOT_CAPABILITIES } from './capabilities'; +import { resolveCopilotCliPath } from './binary-resolver'; +import { parseCopilotConfig, type CopilotProviderDefaults } from './config'; + +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.copilot'); + return cachedLog; +} + +const SEND_AND_WAIT_TIMEOUT_MS = 24 * 60 * 60 * 1000; + +const AUTH_ENV_KEYS = ['COPILOT_GITHUB_TOKEN', 'GH_TOKEN', 'GITHUB_TOKEN'] as const; +type CopilotReasoningEffort = 'low' | 'medium' | 'high' | 'xhigh'; + +/** Structured provider warning collected during translation and flushed as a system chunk. */ +interface ProviderWarning { + code: string; + message: string; +} + +function buildCopilotEnv(requestEnv?: Record): Record { + const baseEnv = Object.fromEntries( + Object.entries(process.env).filter((entry): entry is [string, string] => entry[1] !== undefined) + ); + return { ...baseEnv, ...(requestEnv ?? {}) }; +} + +function resolveGitHubToken(env: Record): string | undefined { + for (const key of AUTH_ENV_KEYS) { + const value = env[key]; + if (value) return value; + } + return undefined; +} + +function normalizeReasoning(value: unknown): CopilotReasoningEffort | undefined { + if (value === 'max') return 'xhigh'; + if (value === 'low' || value === 'medium' || value === 'high' || value === 'xhigh') return value; + return undefined; +} + +function resolveCopilotReasoning(nodeConfig?: SendQueryOptions['nodeConfig']): { + effort: CopilotReasoningEffort | undefined; + warning?: string; +} { + if (!nodeConfig) return { effort: undefined }; + + const rawThinking = nodeConfig.thinking; + const rawEffort = nodeConfig.effort; + + if (rawThinking === 'off' || rawEffort === 'off') return { effort: undefined }; + + const fromThinking = normalizeReasoning(rawThinking); + if (fromThinking) return { effort: fromThinking }; + + const fromEffort = normalizeReasoning(rawEffort); + if (fromEffort) return { effort: fromEffort }; + + if (rawThinking !== undefined && rawThinking !== null && typeof rawThinking === 'object') { + return { + effort: undefined, + warning: + 'Copilot ignored `thinking` (object form is Claude-specific). Use `effort: low|medium|high|max` instead.', + }; + } + + if (typeof rawThinking === 'string' || typeof rawEffort === 'string') { + const offender = typeof rawThinking === 'string' ? rawThinking : rawEffort; + return { + effort: undefined, + warning: `Copilot ignored unknown reasoning level '${String(offender)}'. Valid: low, medium, high, xhigh, max, off.`, + }; + } + + return { effort: undefined }; +} + +function buildSystemMessage(requestOptions?: SendQueryOptions): { content: string } | undefined { + const systemPrompt = requestOptions?.systemPrompt ?? requestOptions?.nodeConfig?.systemPrompt; + if (!systemPrompt) return undefined; + return { content: systemPrompt }; +} + +/** + * Translate Archon's per-node `allowed_tools` / `denied_tools` to Copilot's + * `availableTools` / `excludedTools`. Copilot's spec: `availableTools` takes + * precedence over `excludedTools`. We pass both through when present and let + * the SDK enforce precedence. + */ +function applyToolRestrictions( + sessionConfig: SessionConfig, + nodeConfig: SendQueryOptions['nodeConfig'] +): void { + if (!nodeConfig) return; + if (nodeConfig.allowed_tools !== undefined) { + sessionConfig.availableTools = nodeConfig.allowed_tools; + } + if (nodeConfig.denied_tools !== undefined) { + sessionConfig.excludedTools = nodeConfig.denied_tools; + } +} + +/** + * Translate Archon's `nodeConfig.mcp` (JSON-file path) to Copilot's + * `SessionConfig.mcpServers`. Reuses Claude's `loadMcpConfig` so env-var + * expansion and missing-var detection behave consistently across providers. + */ +async function applyMcpServers( + sessionConfig: SessionConfig, + nodeConfig: SendQueryOptions['nodeConfig'], + cwd: string, + warnings: ProviderWarning[] +): Promise { + const mcpPath = nodeConfig?.mcp; + if (typeof mcpPath !== 'string' || mcpPath.length === 0) return; + + const { servers, serverNames, missingVars } = await loadMcpConfig(mcpPath, cwd); + + if (missingVars.length > 0) { + warnings.push({ + code: 'copilot.mcp_env_vars_missing', + message: `Copilot MCP config references undefined env vars: ${missingVars.join(', ')}. Servers using them may fail at runtime.`, + }); + } + + sessionConfig.mcpServers = servers as Record; + getLog().info({ serverNames, missingVars }, 'copilot.mcp_loaded'); +} + +/** + * Translate Archon's `nodeConfig.skills` (string names) to Copilot's + * `SessionConfig.skillDirectories` (absolute paths). Unresolved names become + * a single system warning chunk so the user notices the typo/missing skill. + */ +function applySkills( + sessionConfig: SessionConfig, + nodeConfig: SendQueryOptions['nodeConfig'], + cwd: string, + warnings: ProviderWarning[] +): void { + if (!nodeConfig?.skills || nodeConfig.skills.length === 0) return; + + const { paths, missing } = resolveSkillDirectories(cwd, nodeConfig.skills); + + if (missing.length > 0) { + warnings.push({ + code: 'copilot.skills_missing', + message: `Copilot ignored missing skills: ${missing.join(', ')}. Expected a directory with SKILL.md under .agents/skills/ or .claude/skills/ (project or home).`, + }); + } + + if (paths.length > 0) { + sessionConfig.skillDirectories = paths; + } + getLog().info({ resolved: paths.length, missing }, 'copilot.skills_resolved'); +} + +/** + * Translate Archon's `nodeConfig.agents` (Record) to + * Copilot's `SessionConfig.customAgents`. Mapping is deliberately narrow — + * only the fields Copilot's `CustomAgentConfig` supports pass through: + * + * name ← map key + * description ← agent.description + * prompt ← agent.prompt + * tools ← agent.tools (allowlist; Copilot has no per-agent denylist) + * + * Archon agent fields Copilot cannot represent (`model`, `disallowedTools`, + * `skills`, `maxTurns`) surface as one consolidated warning per agent. + * + * We do NOT set `SessionConfig.agent` — Archon's workflow model invokes + * sub-agents via the Task tool, not by switching active agent at session + * start. + */ +function applyAgents( + sessionConfig: SessionConfig, + nodeConfig: SendQueryOptions['nodeConfig'], + warnings: ProviderWarning[] +): void { + const agents = nodeConfig?.agents; + if (!agents) return; + const entries = Object.entries(agents); + if (entries.length === 0) return; + + const customAgents: CustomAgentConfig[] = entries.map(([name, def]) => { + const ignored: string[] = []; + if (def.model !== undefined) ignored.push('model'); + if (def.disallowedTools !== undefined) ignored.push('disallowedTools'); + if (def.skills !== undefined) ignored.push('skills'); + if (def.maxTurns !== undefined) ignored.push('maxTurns'); + + if (ignored.length > 0) { + warnings.push({ + code: 'copilot.agent_fields_ignored', + message: `Copilot agent '${name}' ignored unsupported fields: ${ignored.join(', ')}. Copilot supports description, prompt, tools (allowlist) only.`, + }); + } + + return { + name, + description: def.description, + prompt: def.prompt, + ...(def.tools !== undefined ? { tools: def.tools } : {}), + }; + }); + + sessionConfig.customAgents = customAgents; + getLog().info( + { count: customAgents.length, names: customAgents.map(a => a.name) }, + 'copilot.agents_registered' + ); +} + +/** + * Single construction site for the Copilot SessionConfig. Each subsequent + * workflow-parity phase adds one `applyX(sessionConfig, ..., warnings)` call + * below this function — keep business logic here straight-through. + */ +async function buildSessionConfig( + copilotConfig: CopilotProviderDefaults, + requestOptions: SendQueryOptions | undefined, + cwd: string, + warnings: ProviderWarning[], + approveAll: SessionConfig['onPermissionRequest'] +): Promise { + const reasoning = resolveCopilotReasoning(requestOptions?.nodeConfig); + if (reasoning.warning) { + warnings.push({ code: 'copilot.reasoning_ignored', message: reasoning.warning }); + } + + const requestedModel = requestOptions?.model?.trim() || undefined; + const defaultModel = copilotConfig.model?.trim() || undefined; + + const sessionConfig: SessionConfig = { + model: requestedModel ?? defaultModel, + reasoningEffort: reasoning.effort, + workingDirectory: cwd, + configDir: copilotConfig.configDir, + streaming: true, + systemMessage: buildSystemMessage(requestOptions), + enableConfigDiscovery: copilotConfig.enableConfigDiscovery ?? false, + onPermissionRequest: approveAll, + }; + + applyToolRestrictions(sessionConfig, requestOptions?.nodeConfig); + await applyMcpServers(sessionConfig, requestOptions?.nodeConfig, cwd, warnings); + applySkills(sessionConfig, requestOptions?.nodeConfig, cwd, warnings); + applyAgents(sessionConfig, requestOptions?.nodeConfig, warnings); + + return sessionConfig; +} + +function isModelAccessError(errorMessage: string): boolean { + const normalized = errorMessage.toLowerCase(); + const hasModel = normalized.includes('model'); + const hasAvailabilitySignal = + normalized.includes('not available') || + normalized.includes('not found') || + normalized.includes('unsupported'); + return hasModel && hasAvailabilitySignal; +} + +function buildFriendlyCopilotError(error: unknown, lastSessionError?: string): Error { + // A generic `sendAndWait` rejection often hides the actionable detail in + // `session.error` (auth/model-access). Combine both for classification and + // surface both in the user-visible message. + const thrownMessage = error instanceof Error && error.message ? error.message : String(error); + const combined = [thrownMessage, lastSessionError] + .filter((m): m is string => Boolean(m)) + .join('\n'); + const rawMessage = combined || 'Unknown error'; + + if (isModelAccessError(rawMessage)) { + return new Error( + `Copilot model access error: ${rawMessage}\n\n` + + 'Try a different model in the workflow node or set assistants.copilot.model in .archon/config.yaml.' + ); + } + + const normalized = rawMessage.toLowerCase(); + if ( + normalized.includes('auth') || + normalized.includes('login') || + normalized.includes('unauthorized') || + normalized.includes('forbidden') + ) { + return new Error( + `Copilot authentication failed: ${rawMessage}\n\n` + + 'Run `copilot login`, or provide COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.' + ); + } + + return error instanceof Error && !lastSessionError ? error : new Error(rawMessage); +} + +class AsyncChunkQueue { + private values: T[] = []; + private resolvers: ((result: IteratorResult) => void)[] = []; + private closed = false; + + push(value: T): void { + if (this.closed) return; + const resolver = this.resolvers.shift(); + if (resolver) { + resolver({ value, done: false }); + return; + } + this.values.push(value); + } + + close(): void { + if (this.closed) return; + this.closed = true; + for (const resolver of this.resolvers.splice(0)) { + resolver({ value: undefined, done: true }); + } + } + + async next(): Promise> { + if (this.values.length > 0) { + return { value: this.values.shift() as T, done: false }; + } + if (this.closed) { + return { value: undefined, done: true }; + } + return await new Promise>(resolve => { + this.resolvers.push(resolve); + }); + } + + [Symbol.asyncIterator](): AsyncIterator { + return { next: () => this.next() }; + } +} + +interface UsageAccumulator { + input: number; + output: number; + cost?: number; +} + +function addUsage( + acc: UsageAccumulator, + event: Extract +): void { + acc.input += event.data.inputTokens ?? 0; + acc.output += event.data.outputTokens ?? 0; + if (typeof event.data.cost === 'number') { + acc.cost = (acc.cost ?? 0) + event.data.cost; + } +} + +export class CopilotProvider implements IAgentProvider { + getType(): string { + return 'copilot'; + } + + getCapabilities(): typeof COPILOT_CAPABILITIES { + return COPILOT_CAPABILITIES; + } + + async *sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + requestOptions?: SendQueryOptions + ): AsyncGenerator { + const queue = new AsyncChunkQueue(); + + let runError: Error | undefined; + // Hoisted so the outer generator can abort the SDK run if the caller + // stops iterating before the queue closes (e.g. early `break` or thrown + // error in the consumer). Without this, `sendAndWait` would keep running + // up to the 24h ceiling. + let activeSession: Awaited> | undefined; + let runFinished = false; + (async (): Promise => { + // Lazy-load the SDK at first invocation. See the import-block header above + // for why this matters in compiled Archon binaries. Module-namespace + // binding (rather than destructuring) keeps the constructor's PascalCase + // shape at the call site without fighting the camelCase naming-convention + // lint rule applied to local variables. + const copilotSdk = await import('@github/copilot-sdk'); + + const assistantConfig = requestOptions?.assistantConfig ?? {}; + const copilotConfig = parseCopilotConfig(assistantConfig); + const mergedEnv = buildCopilotEnv(requestOptions?.env); + const githubToken = resolveGitHubToken(mergedEnv); + const cliPath = await resolveCopilotCliPath(copilotConfig.copilotCliPath); + + const warnings: ProviderWarning[] = []; + const sessionConfig = await buildSessionConfig( + copilotConfig, + requestOptions, + cwd, + warnings, + copilotSdk.approveAll + ); + + for (const w of warnings) { + queue.push({ type: 'system', content: `⚠️ ${w.message}` }); + } + + const client = new copilotSdk.CopilotClient({ + cliPath, + cwd, + env: mergedEnv, + githubToken, + // Precedence: explicit config wins over an env token. Default behavior + // is unchanged when the user sets nothing — env token present → + // useLoggedInUser:false, otherwise true. But if the user explicitly + // sets `useLoggedInUser: true` (e.g. they have GH_TOKEN exported for a + // different gh CLI account and want Copilot to use the logged-in + // session), that intent is now honored instead of being silently + // overridden. + useLoggedInUser: copilotConfig.useLoggedInUser ?? !githubToken, + logLevel: copilotConfig.logLevel, + }); + + let session: Awaited> | undefined; + let lastSessionError: string | undefined; + const streamedMessageIds = new Set(); + const streamedReasoningIds = new Set(); + const toolNames = new Map(); + const usage: UsageAccumulator = { input: 0, output: 0 }; + let sawAssistantContent = false; + + // Best-effort structured output: Copilot has no native JSON-mode, so we + // augment the prompt with the schema and parse the accumulated assistant + // transcript at the end. Parse failure → leave `structuredOutput` unset + // and let the dag-executor surface its existing missing-output warning. + const outputFormat = requestOptions?.outputFormat; + const wantsStructured = outputFormat?.type === 'json_schema'; + const effectivePrompt = wantsStructured + ? augmentPromptForJsonSchema(prompt, outputFormat.schema) + : prompt; + let assistantBuffer = ''; + + try { + session = resumeSessionId + ? await client.resumeSession(resumeSessionId, sessionConfig) + : await client.createSession(sessionConfig); + activeSession = session; + + session.on('assistant.reasoning_delta', event => { + streamedReasoningIds.add(event.data.reasoningId); + if (event.data.deltaContent) { + queue.push({ type: 'thinking', content: event.data.deltaContent }); + } + }); + + session.on('assistant.reasoning', event => { + if (streamedReasoningIds.has(event.data.reasoningId)) return; + if (event.data.content) { + queue.push({ type: 'thinking', content: event.data.content }); + } + }); + + session.on('assistant.message_delta', event => { + streamedMessageIds.add(event.data.messageId); + if (event.data.deltaContent) { + sawAssistantContent = true; + assistantBuffer += event.data.deltaContent; + queue.push({ type: 'assistant', content: event.data.deltaContent }); + } + }); + + session.on('assistant.message', event => { + if (streamedMessageIds.has(event.data.messageId)) return; + if (event.data.content) { + sawAssistantContent = true; + assistantBuffer += event.data.content; + queue.push({ type: 'assistant', content: event.data.content }); + } + }); + + session.on('assistant.usage', event => { + addUsage(usage, event); + }); + + session.on('tool.execution_start', event => { + toolNames.set(event.data.toolCallId, event.data.toolName); + queue.push({ + type: 'tool', + toolName: event.data.toolName, + toolInput: event.data.arguments, + toolCallId: event.data.toolCallId, + }); + }); + + session.on('tool.execution_complete', event => { + queue.push({ + type: 'tool_result', + toolName: toolNames.get(event.data.toolCallId) ?? 'unknown', + toolOutput: event.data.result?.detailedContent ?? event.data.result?.content ?? '', + toolCallId: event.data.toolCallId, + }); + }); + + session.on('session.error', event => { + lastSessionError = event.data.message; + }); + + const abortSignal = requestOptions?.abortSignal; + // `addEventListener('abort', ...)` is a no-op on an already-aborted + // signal, so short-circuit before handing the 24-hour sendAndWait + // path a signal that will never fire. + if (abortSignal?.aborted) { + throw new DOMException('Copilot sendQuery aborted before start', 'AbortError'); + } + const onAbort = (): void => { + if (!session) return; + void session.abort().catch(err => { + getLog().warn({ err }, 'copilot.abort_failed'); + }); + }; + abortSignal?.addEventListener('abort', onAbort, { once: true }); + + let finalMessage: AssistantMessageEvent | undefined; + try { + finalMessage = await session.sendAndWait( + { prompt: effectivePrompt }, + SEND_AND_WAIT_TIMEOUT_MS + ); + } finally { + abortSignal?.removeEventListener('abort', onAbort); + } + + if (!sawAssistantContent && finalMessage?.data.content) { + sawAssistantContent = true; + assistantBuffer += finalMessage.data.content; + queue.push({ type: 'assistant', content: finalMessage.data.content }); + } + + if (!sawAssistantContent && lastSessionError) { + queue.push({ type: 'system', content: `⚠️ ${lastSessionError}` }); + } + + const structuredOutput = wantsStructured + ? tryParseStructuredOutput(assistantBuffer) + : undefined; + + queue.push({ + type: 'result', + sessionId: session.sessionId, + tokens: + usage.input > 0 || usage.output > 0 || usage.cost !== undefined + ? { + input: usage.input, + output: usage.output, + total: usage.input + usage.output, + cost: usage.cost, + } + : undefined, + cost: usage.cost, + ...(structuredOutput !== undefined ? { structuredOutput } : {}), + }); + } catch (error) { + throw buildFriendlyCopilotError(error, lastSessionError); + } finally { + // Cleanup must not throw — doing so would replace the primary + // result/error from the try block above. Log and swallow. + try { + await session?.disconnect(); + } catch (err) { + getLog().warn({ err }, 'copilot.disconnect_failed'); + } + try { + const stopErrors = await client.stop(); + if (stopErrors.length > 0) { + getLog().warn( + { errors: stopErrors.map(err => err.message) }, + 'copilot.client_stop_errors' + ); + } + } catch (err) { + getLog().warn({ err }, 'copilot.client_stop_threw'); + } + } + })() + .catch(error => { + runError = error as Error; + }) + .finally(() => { + runFinished = true; + queue.close(); + }); + + try { + for await (const chunk of queue) { + yield chunk; + } + } finally { + // If the consumer stops iterating before the run finishes (early break, + // thrown error in caller, generator.return()), drain the SDK session so + // we don't keep paying for a sendAndWait that nobody will read. + if (!runFinished && activeSession) { + queue.close(); + void activeSession.abort().catch(err => { + getLog().warn({ err }, 'copilot.abort_failed'); + }); + } + } + + if (runError) throw runError; + } +} diff --git a/packages/providers/src/community/copilot/registration.ts b/packages/providers/src/community/copilot/registration.ts new file mode 100644 index 0000000000..eb8b4f4638 --- /dev/null +++ b/packages/providers/src/community/copilot/registration.ts @@ -0,0 +1,14 @@ +import { isRegisteredProvider, registerProvider } from '../../registry'; +import { COPILOT_CAPABILITIES } from './capabilities'; +import { CopilotProvider } from './provider'; + +export function registerCopilotProvider(): void { + if (isRegisteredProvider('copilot')) return; + registerProvider({ + id: 'copilot', + displayName: 'GitHub Copilot (community)', + factory: () => new CopilotProvider(), + capabilities: COPILOT_CAPABILITIES, + builtIn: false, + }); +} diff --git a/packages/providers/src/community/copilot/skills-translation.test.ts b/packages/providers/src/community/copilot/skills-translation.test.ts new file mode 100644 index 0000000000..e59ef82ac6 --- /dev/null +++ b/packages/providers/src/community/copilot/skills-translation.test.ts @@ -0,0 +1,164 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +const mockLogger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', +}; + +mock.module('@archon/paths', () => ({ + createLogger: () => mockLogger, +})); + +mock.module('./binary-resolver', () => ({ + resolveCopilotCliPath: async () => '/usr/local/bin/copilot', +})); + +type SessionHandler = (event: Record) => void; + +let registeredHandlers: Record = {}; +const mockSession = { + sessionId: 'copilot-session-skills', + on: mock((eventType: string, handler: SessionHandler) => { + registeredHandlers[eventType] ??= []; + registeredHandlers[eventType].push(handler); + return () => undefined; + }), + sendAndWait: mock(async () => ({ data: { content: 'ok', messageId: 'm' } })), + abort: mock(async () => undefined), + disconnect: mock(async () => undefined), +}; + +const capturedSessionConfigs: Array> = []; +const mockCreateSession = mock(async (config: Record) => { + capturedSessionConfigs.push(config); + return mockSession; +}); + +mock.module('@github/copilot-sdk', () => ({ + approveAll: () => ({ kind: 'approved' }), + CopilotClient: class MockCopilotClient { + createSession = mockCreateSession; + resumeSession = mock(async () => mockSession); + stop = mock(async () => []); + }, +})); + +import { CopilotProvider } from './provider'; + +async function collectChunks(generator: AsyncGenerator): Promise { + const chunks: unknown[] = []; + for await (const chunk of generator) chunks.push(chunk); + return chunks; +} + +let tmpRoot = ''; +let workDir = ''; +let originalHome: string | undefined; + +describe('applySkills', () => { + beforeEach(() => { + registeredHandlers = {}; + capturedSessionConfigs.length = 0; + mockCreateSession.mockClear(); + + tmpRoot = mkdtempSync(join(tmpdir(), 'copilot-skills-')); + workDir = join(tmpRoot, 'project'); + const home = join(tmpRoot, 'home'); + + originalHome = process.env.HOME; + process.env.HOME = home; + + // Stage: + // /.agents/skills/alpha/SKILL.md + // /.claude/skills/beta/SKILL.md + const stage: Array<[string, string]> = [ + [join(workDir, '.agents', 'skills', 'alpha'), 'SKILL.md'], + [join(home, '.claude', 'skills', 'beta'), 'SKILL.md'], + ]; + for (const [dir, file] of stage) { + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, file), '# skill\n'); + } + }); + + afterEach(() => { + if (originalHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = originalHome; + } + rmSync(tmpRoot, { recursive: true, force: true }); + }); + + test('omits skillDirectories when nodeConfig.skills is absent', async () => { + await collectChunks( + new CopilotProvider().sendQuery('hi', workDir, undefined, { model: 'gpt-5' }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.skillDirectories).toBeUndefined(); + }); + + test('resolves project + home skills to absolute paths', async () => { + await collectChunks( + new CopilotProvider().sendQuery('hi', workDir, undefined, { + model: 'gpt-5', + nodeConfig: { skills: ['alpha', 'beta'] }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + const dirs = cfg.skillDirectories as string[] | undefined; + expect(dirs).toHaveLength(2); + expect(dirs?.[0]).toContain(join('.agents', 'skills', 'alpha')); + expect(dirs?.[1]).toContain(join('.claude', 'skills', 'beta')); + }); + + test('warns on missing skills but still attaches resolved ones', async () => { + const chunks = await collectChunks( + new CopilotProvider().sendQuery('hi', workDir, undefined, { + model: 'gpt-5', + nodeConfig: { skills: ['alpha', 'does-not-exist'] }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + const dirs = cfg.skillDirectories as string[] | undefined; + expect(dirs).toHaveLength(1); + expect(chunks).toContainEqual( + expect.objectContaining({ + type: 'system', + content: expect.stringContaining('does-not-exist'), + }) + ); + }); + + test('omits skillDirectories entirely when nothing resolves', async () => { + await collectChunks( + new CopilotProvider().sendQuery('hi', workDir, undefined, { + model: 'gpt-5', + nodeConfig: { skills: ['nope'] }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.skillDirectories).toBeUndefined(); + }); +}); diff --git a/packages/providers/src/community/copilot/structured-output.test.ts b/packages/providers/src/community/copilot/structured-output.test.ts new file mode 100644 index 0000000000..78b3914580 --- /dev/null +++ b/packages/providers/src/community/copilot/structured-output.test.ts @@ -0,0 +1,199 @@ +import { beforeEach, describe, expect, mock, test } from 'bun:test'; + +const mockLogger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', +}; + +mock.module('@archon/paths', () => ({ + createLogger: () => mockLogger, +})); + +mock.module('./binary-resolver', () => ({ + resolveCopilotCliPath: async () => '/usr/local/bin/copilot', +})); + +type SessionHandler = (event: Record) => void; + +let registeredHandlers: Record = {}; +let scriptedFinalMessage: { data: { content: string; messageId: string } } | undefined; +let sendAndWaitImpl: + | (() => Promise<{ data: { content: string; messageId: string } } | undefined>) + | undefined; +let capturedSendPrompt: string | undefined; + +const mockSendAndWait = mock(async (input: { prompt: string }) => { + capturedSendPrompt = input.prompt; + if (sendAndWaitImpl) return await sendAndWaitImpl(); + return scriptedFinalMessage; +}); + +const mockSession = { + sessionId: 'copilot-session-struct', + on: mock((eventType: string, handler: SessionHandler) => { + registeredHandlers[eventType] ??= []; + registeredHandlers[eventType].push(handler); + return () => undefined; + }), + sendAndWait: mockSendAndWait, + abort: mock(async () => undefined), + disconnect: mock(async () => undefined), +}; + +mock.module('@github/copilot-sdk', () => ({ + approveAll: () => ({ kind: 'approved' }), + CopilotClient: class MockCopilotClient { + createSession = mock(async () => mockSession); + resumeSession = mock(async () => mockSession); + stop = mock(async () => []); + }, +})); + +import { CopilotProvider } from './provider'; + +function emit(eventType: string, data: Record): void { + for (const handler of registeredHandlers[eventType] ?? []) { + handler({ + id: crypto.randomUUID(), + timestamp: new Date().toISOString(), + parentId: null, + type: eventType, + data, + }); + } +} + +async function collect(generator: AsyncGenerator): Promise { + const chunks: unknown[] = []; + for await (const chunk of generator) chunks.push(chunk); + return chunks; +} + +function firstResult(chunks: unknown[]): Record | undefined { + return chunks.find( + (c): c is Record => + typeof c === 'object' && c !== null && (c as { type?: string }).type === 'result' + ); +} + +describe('Copilot structured output', () => { + beforeEach(() => { + registeredHandlers = {}; + scriptedFinalMessage = { data: { content: '', messageId: 'final' } }; + sendAndWaitImpl = undefined; + capturedSendPrompt = undefined; + mockSendAndWait.mockClear(); + }); + + test('passes prompt through unchanged when outputFormat is absent', async () => { + await collect(new CopilotProvider().sendQuery('plain prompt', '/repo')); + expect(capturedSendPrompt).toBe('plain prompt'); + }); + + test('augments prompt with schema when outputFormat is set', async () => { + await collect( + new CopilotProvider().sendQuery('give me users', '/repo', undefined, { + outputFormat: { + type: 'json_schema', + schema: { type: 'object', properties: { count: { type: 'number' } } }, + }, + }) + ); + expect(capturedSendPrompt).toContain('give me users'); + expect(capturedSendPrompt).toContain('Respond with ONLY a JSON object'); + expect(capturedSendPrompt).toContain('"count"'); + }); + + test('attaches structuredOutput on valid JSON transcript', async () => { + sendAndWaitImpl = async () => { + emit('assistant.message_delta', { + messageId: 'm1', + deltaContent: '{"count": 3, "ok": true}', + }); + return scriptedFinalMessage; + }; + + const chunks = await collect( + new CopilotProvider().sendQuery('q', '/repo', undefined, { + outputFormat: { type: 'json_schema', schema: {} }, + }) + ); + + const result = firstResult(chunks); + expect(result?.structuredOutput).toEqual({ count: 3, ok: true }); + }); + + test('strips ```json fences before parsing', async () => { + sendAndWaitImpl = async () => { + emit('assistant.message_delta', { + messageId: 'm1', + deltaContent: '```json\n{"x": 1}\n```', + }); + return scriptedFinalMessage; + }; + + const chunks = await collect( + new CopilotProvider().sendQuery('q', '/repo', undefined, { + outputFormat: { type: 'json_schema', schema: {} }, + }) + ); + + expect(firstResult(chunks)?.structuredOutput).toEqual({ x: 1 }); + }); + + test('omits structuredOutput when transcript is unparseable', async () => { + sendAndWaitImpl = async () => { + emit('assistant.message_delta', { + messageId: 'm1', + deltaContent: 'this is not JSON', + }); + return scriptedFinalMessage; + }; + + const chunks = await collect( + new CopilotProvider().sendQuery('q', '/repo', undefined, { + outputFormat: { type: 'json_schema', schema: {} }, + }) + ); + + const result = firstResult(chunks); + expect(result).toBeDefined(); + expect(result).not.toHaveProperty('structuredOutput'); + }); + + test('omits structuredOutput when outputFormat is absent', async () => { + sendAndWaitImpl = async () => { + emit('assistant.message_delta', { + messageId: 'm1', + deltaContent: '{"valid": "json"}', + }); + return scriptedFinalMessage; + }; + + const chunks = await collect(new CopilotProvider().sendQuery('q', '/repo')); + const result = firstResult(chunks); + expect(result).not.toHaveProperty('structuredOutput'); + }); + + test('parses from final assistant message fallback', async () => { + scriptedFinalMessage = { data: { content: '{"v": 42}', messageId: 'final' } }; + + const chunks = await collect( + new CopilotProvider().sendQuery('q', '/repo', undefined, { + outputFormat: { type: 'json_schema', schema: {} }, + }) + ); + + expect(firstResult(chunks)?.structuredOutput).toEqual({ v: 42 }); + }); +}); diff --git a/packages/providers/src/community/copilot/tool-restrictions.test.ts b/packages/providers/src/community/copilot/tool-restrictions.test.ts new file mode 100644 index 0000000000..96e4951e93 --- /dev/null +++ b/packages/providers/src/community/copilot/tool-restrictions.test.ts @@ -0,0 +1,144 @@ +import { beforeEach, describe, expect, mock, test } from 'bun:test'; + +const mockLogger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', +}; + +mock.module('@archon/paths', () => ({ + createLogger: () => mockLogger, +})); + +mock.module('./binary-resolver', () => ({ + resolveCopilotCliPath: async () => '/usr/local/bin/copilot', +})); + +type SessionHandler = (event: Record) => void; + +let registeredHandlers: Record = {}; +const mockSession = { + sessionId: 'copilot-session-tools', + on: mock((eventType: string, handler: SessionHandler) => { + registeredHandlers[eventType] ??= []; + registeredHandlers[eventType].push(handler); + return () => undefined; + }), + sendAndWait: mock(async () => ({ + data: { content: 'ok', messageId: 'm' }, + })), + abort: mock(async () => undefined), + disconnect: mock(async () => undefined), +}; + +const capturedSessionConfigs: Array> = []; +const mockCreateSession = mock(async (config: Record) => { + capturedSessionConfigs.push(config); + return mockSession; +}); +const mockResumeSession = mock(async (_id: string, config: Record) => { + capturedSessionConfigs.push(config); + return mockSession; +}); + +mock.module('@github/copilot-sdk', () => ({ + approveAll: () => ({ kind: 'approved' }), + CopilotClient: class MockCopilotClient { + createSession = mockCreateSession; + resumeSession = mockResumeSession; + stop = mock(async () => []); + }, +})); + +import { CopilotProvider } from './provider'; + +async function drain(generator: AsyncGenerator): Promise { + for await (const _chunk of generator) void _chunk; +} + +describe('applyToolRestrictions', () => { + beforeEach(() => { + registeredHandlers = {}; + capturedSessionConfigs.length = 0; + mockCreateSession.mockClear(); + mockResumeSession.mockClear(); + }); + + test('omits availableTools/excludedTools when nodeConfig has neither', async () => { + await drain(new CopilotProvider().sendQuery('hi', '/repo', undefined, { model: 'gpt-5' })); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.availableTools).toBeUndefined(); + expect(cfg.excludedTools).toBeUndefined(); + }); + + test('passes allowed_tools through as availableTools', async () => { + await drain( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + nodeConfig: { allowed_tools: ['read_file', 'write_file'] }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.availableTools).toEqual(['read_file', 'write_file']); + expect(cfg.excludedTools).toBeUndefined(); + }); + + test('passes denied_tools through as excludedTools', async () => { + await drain( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + nodeConfig: { denied_tools: ['shell'] }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.excludedTools).toEqual(['shell']); + expect(cfg.availableTools).toBeUndefined(); + }); + + test('passes both through when both present (SDK enforces availableTools precedence)', async () => { + await drain( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + nodeConfig: { + allowed_tools: ['read_file'], + denied_tools: ['shell'], + }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.availableTools).toEqual(['read_file']); + expect(cfg.excludedTools).toEqual(['shell']); + }); + + test('applies restrictions on resumeSession path too', async () => { + await drain( + new CopilotProvider().sendQuery('hi', '/repo', 'resume-abc', { + model: 'gpt-5', + nodeConfig: { allowed_tools: ['read_file'] }, + }) + ); + + expect(capturedSessionConfigs).toHaveLength(1); + const cfg = capturedSessionConfigs[0]!; + expect(cfg.availableTools).toEqual(['read_file']); + expect(mockResumeSession).toHaveBeenCalledTimes(1); + expect(mockCreateSession).toHaveBeenCalledTimes(0); + }); +}); diff --git a/packages/providers/src/community/pi/event-bridge.ts b/packages/providers/src/community/pi/event-bridge.ts index 4adde52809..e19f3b683b 100644 --- a/packages/providers/src/community/pi/event-bridge.ts +++ b/packages/providers/src/community/pi/event-bridge.ts @@ -151,55 +151,11 @@ export function buildResultChunk(messages: readonly unknown[]): MessageChunk { return chunk; } -/** - * Attempt to parse a Pi assistant transcript as the structured-output JSON - * requested via `outputFormat`. Handles three common model failure modes: - * - trailing/leading whitespace (always stripped) - * - markdown code fences (```json ... ``` or bare ``` ... ```) that models - * emit despite the "no code fences" instruction in the prompt - * - prose preamble followed by a single trailing JSON object — pattern - * observed on Minimax M2.7 ("Now I have all the inputs. Let me evaluate - * the three gates: ... {...}"). Reasoning models tend to "think out loud" - * before emitting structured output despite explicit JSON-only prompts. - * - * Returns the parsed value on success, `undefined` on any failure. Callers - * treat `undefined` as "structured output unavailable" and degrade via the - * dag-executor's existing missing-structured-output warning. - */ -export function tryParseStructuredOutput(text: string): unknown { - const trimmed = text.trim(); - if (trimmed.length === 0) return undefined; - // Strip ```json / ``` fences if present. Match only at boundaries so we - // don't mangle JSON strings that legitimately contain backticks. - const cleaned = trimmed - .replace(/^```(?:json)?\s*\n?/i, '') - .replace(/\n?\s*```\s*$/, '') - .trim(); - - // Tier 1: clean parse — fast path for fully compliant outputs. - try { - return JSON.parse(cleaned); - } catch { - // fall through - } - - // Tier 2: scan forward to the FIRST `{` and parse from there. Recovers the - // preamble-then-JSON pattern reasoning models emit. A backward scan from - // the last `{` was considered but rejected: it silently returns the wrong - // object when the prose contains a brace-bearing example after the real - // payload (e.g. `{"actual":1}\nFor example: {"x":2}` would yield `{x:2}`), - // breaking the conservative-failure contract callers rely on. - const firstBrace = cleaned.indexOf('{'); - if (firstBrace > 0) { - try { - return JSON.parse(cleaned.slice(firstBrace)); - } catch { - // fall through - } - } - - return undefined; -} +// Structured-output parsing is shared across providers. Import once for +// local use and re-export so Pi's existing tests and callers don't need to +// change import paths. +import { tryParseStructuredOutput } from '../../shared/structured-output'; +export { tryParseStructuredOutput }; /** * Pure mapper from Pi's `AgentSessionEvent` → zero-or-more Archon `MessageChunk`s. diff --git a/packages/providers/src/community/pi/options-translator.ts b/packages/providers/src/community/pi/options-translator.ts index d970985f4e..61d15f7e53 100644 --- a/packages/providers/src/community/pi/options-translator.ts +++ b/packages/providers/src/community/pi/options-translator.ts @@ -1,7 +1,3 @@ -import { existsSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { join } from 'node:path'; - import { codingTools, createBashTool, @@ -249,79 +245,11 @@ export function resolvePiTools( // ─── Skills ──────────────────────────────────────────────────────────────── -export interface ResolvedSkills { - /** Absolute paths to resolved skill directories. Each contains a SKILL.md. */ - paths: string[]; - /** Skill names that couldn't be resolved in any search location. */ - missing: string[]; -} - -/** - * Pi's skill-discovery search order for a named skill. Mirrors the locations - * Claude's SDK and Pi's default resource loader both respect, so Archon - * workflows that already work under Claude find the same skills under Pi. - * - * Order (first match wins per name): - * 1. `/.agents/skills//` — project-local, agentskills.io standard - * 2. `/.claude/skills//` — project-local, Claude convention - * 3. `~/.agents/skills//` — user-global, agentskills.io standard - * 4. `~/.claude/skills//` — user-global, Claude convention - * - * Ancestor traversal above cwd is deliberately not done in v2 — matches the - * Pi provider's cwd-bound scope and avoids ambiguity about which repo's - * skills win when Archon runs out of a subdirectory. - */ -function skillSearchRoots(cwd: string): string[] { - // Prefer `HOME` env var when set — Bun's os.homedir() bypasses `HOME` and - // reads from the system uid lookup, which is correct in production but - // makes tests using staged temp homes impossible. The fallback to - // homedir() keeps behavior identical in non-test contexts. - const home = process.env.HOME ?? homedir(); - return [ - join(cwd, '.agents', 'skills'), - join(cwd, '.claude', 'skills'), - join(home, '.agents', 'skills'), - join(home, '.claude', 'skills'), - ]; -} - -/** - * Resolve Archon's name-based `skills:` nodeConfig references to absolute - * directory paths Pi's resource loader can consume via `additionalSkillPaths`. - * - * Each named skill is expected to be a directory containing a `SKILL.md` - * file — the agentskills.io standard layout. - */ -export function resolvePiSkills(cwd: string, skillNames: string[] | undefined): ResolvedSkills { - if (!skillNames || skillNames.length === 0) { - return { paths: [], missing: [] }; - } - - const roots = skillSearchRoots(cwd); - const paths: string[] = []; - const missing: string[] = []; - const seen = new Set(); - - for (const rawName of skillNames) { - if (typeof rawName !== 'string' || rawName.length === 0) continue; - if (seen.has(rawName)) continue; - seen.add(rawName); - - let found: string | undefined; - for (const root of roots) { - const candidate = join(root, rawName); - if (existsSync(join(candidate, 'SKILL.md'))) { - found = candidate; - break; - } - } - - if (found) { - paths.push(found); - } else { - missing.push(rawName); - } - } - - return { paths, missing }; -} +// Skill resolution is shared across providers. Pi re-exports the shared +// implementation under its historical name to avoid churning callers and +// tests; new providers should import `resolveSkillDirectories` directly +// from `../../shared/skills`. +export { + resolveSkillDirectories as resolvePiSkills, + type ResolvedSkills, +} from '../../shared/skills'; diff --git a/packages/providers/src/community/pi/provider.ts b/packages/providers/src/community/pi/provider.ts index 5a14ed6166..c240747bf1 100644 --- a/packages/providers/src/community/pi/provider.ts +++ b/packages/providers/src/community/pi/provider.ts @@ -94,29 +94,11 @@ function getLog(): ReturnType { return cachedLog; } -/** - * Append a "respond with JSON matching this schema" instruction to the user - * prompt so Pi-backed models produce parseable structured output. Pi's SDK - * has no JSON-mode equivalent to Claude's outputFormat or Codex's - * outputSchema, so this is a best-effort fallback: the event bridge parses - * the assistant transcript on agent_end. Models that reliably follow - * instruction (GPT-5, Claude, Gemini 2.x, recent Qwen Coder, DeepSeek V3) - * return clean JSON; models that don't produce a parse failure, which the - * executor surfaces via the existing dag.structured_output_missing warning. - */ -export function augmentPromptForJsonSchema( - prompt: string, - schema: Record -): string { - return `${prompt} - ---- - -CRITICAL: Respond with ONLY a JSON object matching the schema below. No prose before or after the JSON. No markdown code fences. Just the raw JSON object as your final message. - -Schema: -${JSON.stringify(schema, null, 2)}`; -} +// Structured-output prompt augmentation is shared across providers. Import +// once for local use and re-export so existing callers and tests keep their +// import path stable; new providers should import from `../../shared/structured-output`. +import { augmentPromptForJsonSchema } from '../../shared/structured-output'; +export { augmentPromptForJsonSchema }; /** * Pi community provider — wraps `@mariozechner/pi-coding-agent`'s full diff --git a/packages/providers/src/index.ts b/packages/providers/src/index.ts index d430f8d402..0cc32e73cb 100644 --- a/packages/providers/src/index.ts +++ b/packages/providers/src/index.ts @@ -47,6 +47,14 @@ export { resolveCodexBinaryPath, fileExists as codexFileExists } from './codex/b export { resolveClaudeBinaryPath, fileExists as claudeFileExists } from './claude/binary-resolver'; // Community providers +export { + CopilotProvider, + parseCopilotConfig, + registerCopilotProvider, + resolveCopilotCliPath, + type CopilotProviderDefaults, +} from './community/copilot'; + export { PiProvider, parsePiConfig, diff --git a/packages/providers/src/registry.test.ts b/packages/providers/src/registry.test.ts index ee3e04ee04..7e2d9b992b 100644 --- a/packages/providers/src/registry.test.ts +++ b/packages/providers/src/registry.test.ts @@ -11,6 +11,7 @@ import { registerCommunityProviders, clearRegistry, } from './registry'; +import { registerCopilotProvider } from './community/copilot/registration'; import { registerPiProvider } from './community/pi/registration'; import { UnknownProviderError } from './errors'; import type { ProviderRegistration, IAgentProvider, ProviderCapabilities } from './types'; @@ -252,19 +253,70 @@ describe('registry', () => { describe('registerCommunityProviders (aggregator)', () => { test('registers all bundled community providers', () => { registerCommunityProviders(); - // Pi is currently the only community provider bundled. When more are - // added, they should appear here automatically. + // Bundled community providers should appear here automatically. + expect(isRegisteredProvider('copilot')).toBe(true); expect(isRegisteredProvider('pi')).toBe(true); }); test('is idempotent', () => { registerCommunityProviders(); expect(() => registerCommunityProviders()).not.toThrow(); + const copilotCount = getRegisteredProviders().filter(p => p.id === 'copilot').length; const piCount = getRegisteredProviders().filter(p => p.id === 'pi').length; + expect(copilotCount).toBe(1); expect(piCount).toBe(1); }); }); + describe('registerCopilotProvider (community provider)', () => { + test('registers copilot with builtIn: false', () => { + registerCopilotProvider(); + const reg = getRegistration('copilot'); + expect(reg.id).toBe('copilot'); + expect(reg.displayName).toBe('GitHub Copilot (community)'); + expect(reg.builtIn).toBe(false); + }); + + test('is idempotent', () => { + registerCopilotProvider(); + expect(() => registerCopilotProvider()).not.toThrow(); + const entries = getRegisteredProviders().filter(p => p.id === 'copilot'); + expect(entries).toHaveLength(1); + }); + + test('declares conservative capabilities', () => { + registerCopilotProvider(); + const caps = getProviderCapabilities('copilot'); + expect(caps.sessionResume).toBe(true); + expect(caps.envInjection).toBe(true); + expect(caps.effortControl).toBe(true); + expect(caps.thinkingControl).toBe(true); + expect(caps.mcp).toBe(true); + expect(caps.hooks).toBe(false); + expect(caps.skills).toBe(true); + expect(caps.toolRestrictions).toBe(true); + expect(caps.structuredOutput).toBe(true); + expect(caps.agents).toBe(true); + expect(caps.fallbackModel).toBe(false); + expect(caps.sandbox).toBe(false); + }); + + test('appears in getProviderInfoList with builtIn: false', () => { + registerCopilotProvider(); + const info = getProviderInfoList().find(p => p.id === 'copilot'); + expect(info).toBeDefined(); + expect(info?.builtIn).toBe(false); + }); + + test('does not collide with built-ins', () => { + registerCopilotProvider(); + const ids = getRegisteredProviders() + .map(p => p.id) + .sort(); + expect(ids).toEqual(['claude', 'codex', 'copilot']); + }); + }); + describe('registerPiProvider (community provider)', () => { test('registers pi with builtIn: false', () => { registerPiProvider(); diff --git a/packages/providers/src/registry.ts b/packages/providers/src/registry.ts index 7006ab4961..00466febf7 100644 --- a/packages/providers/src/registry.ts +++ b/packages/providers/src/registry.ts @@ -17,6 +17,7 @@ import { ClaudeProvider } from './claude/provider'; import { CodexProvider } from './codex/provider'; import { CLAUDE_CAPABILITIES } from './claude/capabilities'; import { CODEX_CAPABILITIES } from './codex/capabilities'; +import { registerCopilotProvider } from './community/copilot/registration'; import { registerPiProvider } from './community/pi/registration'; import { UnknownProviderError } from './errors'; import { createLogger } from '@archon/paths'; @@ -152,6 +153,7 @@ export function registerBuiltinProviders(): void { * disappear. */ export function registerCommunityProviders(): void { + registerCopilotProvider(); registerPiProvider(); } diff --git a/packages/providers/src/shared/skills.ts b/packages/providers/src/shared/skills.ts new file mode 100644 index 0000000000..bc9c361300 --- /dev/null +++ b/packages/providers/src/shared/skills.ts @@ -0,0 +1,84 @@ +import { existsSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; + +export interface ResolvedSkills { + /** Absolute paths to resolved skill directories. Each contains a SKILL.md. */ + paths: string[]; + /** Skill names that couldn't be resolved in any search location. */ + missing: string[]; +} + +/** + * Skill-discovery search order for a named skill. Mirrors the locations + * Claude's SDK and Pi's default resource loader both respect, so Archon + * workflows that already work under Claude find the same skills under any + * provider that calls this resolver. + * + * Order (first match wins per name): + * 1. `/.agents/skills//` — project-local, agentskills.io standard + * 2. `/.claude/skills//` — project-local, Claude convention + * 3. `~/.agents/skills//` — user-global, agentskills.io standard + * 4. `~/.claude/skills//` — user-global, Claude convention + * + * Ancestor traversal above cwd is deliberately not done — matches Pi's + * cwd-bound scope and avoids ambiguity about which repo's skills win when + * Archon runs out of a subdirectory. + */ +function skillSearchRoots(cwd: string): string[] { + // Prefer `HOME` env var when set — Bun's os.homedir() bypasses `HOME` and + // reads from the system uid lookup, which is correct in production but + // makes tests using staged temp homes impossible. + const home = process.env.HOME ?? homedir(); + return [ + join(cwd, '.agents', 'skills'), + join(cwd, '.claude', 'skills'), + join(home, '.agents', 'skills'), + join(home, '.claude', 'skills'), + ]; +} + +/** + * Resolve Archon's name-based `skills:` nodeConfig references to absolute + * directory paths. Each named skill is expected to be a directory containing + * a `SKILL.md` file — the agentskills.io standard layout. + * + * Duplicate names are de-duped; empty/non-string entries are skipped. + * Unresolved names are returned in `missing` for caller-side warning. + */ +export function resolveSkillDirectories( + cwd: string, + skillNames: string[] | undefined +): ResolvedSkills { + if (!skillNames || skillNames.length === 0) { + return { paths: [], missing: [] }; + } + + const roots = skillSearchRoots(cwd); + const paths: string[] = []; + const missing: string[] = []; + const seen = new Set(); + + for (const rawName of skillNames) { + if (typeof rawName !== 'string' || rawName.length === 0) continue; + if (seen.has(rawName)) continue; + seen.add(rawName); + + let found: string | undefined; + for (const root of roots) { + const candidate = join(root, rawName); + if (existsSync(join(candidate, 'SKILL.md'))) { + found = candidate; + break; + } + } + + if (found) { + paths.push(found); + } else { + missing.push(rawName); + } + } + + return { paths, missing }; +} diff --git a/packages/providers/src/shared/structured-output.ts b/packages/providers/src/shared/structured-output.ts new file mode 100644 index 0000000000..302cf21d41 --- /dev/null +++ b/packages/providers/src/shared/structured-output.ts @@ -0,0 +1,94 @@ +/** + * Shared best-effort structured-output helpers for providers that have no + * native JSON-mode equivalent to Claude's `outputFormat` or Codex's + * `outputSchema`. The approach is two-step: + * + * 1. Augment the user prompt with a "respond with JSON matching this schema" + * instruction, so instruction-following models emit parseable JSON. + * 2. After the run completes, parse the accumulated assistant transcript. + * + * Models that reliably follow instruction (GPT-5, Claude, Gemini 2.x, recent + * Qwen Coder, DeepSeek V3) return clean JSON; models that don't produce a + * parse failure, which the executor surfaces via the existing + * `dag.structured_output_missing` warning. + */ + +/** + * Append a "respond with JSON matching this schema" instruction to the user + * prompt. Same wording originally authored for Pi — reused verbatim so + * prompt drift across providers is zero. + */ +export function augmentPromptForJsonSchema( + prompt: string, + schema: Record +): string { + return `${prompt} + +--- + +CRITICAL: Respond with ONLY a JSON object matching the schema below. No prose before or after the JSON. No markdown code fences. Just the raw JSON object as your final message. + +Schema: +${JSON.stringify(schema, null, 2)}`; +} + +/** + * Attempt to parse an assistant transcript as the structured-output JSON. + * Handles three common model failure modes: + * - trailing/leading whitespace (always stripped) + * - markdown code fences (```json ... ``` or bare ``` ... ```) that models + * emit despite the "no code fences" instruction in the prompt + * - prose preamble followed by a single trailing JSON object — pattern + * observed on Minimax M2.7 ("Now I have all the inputs. Let me evaluate + * the three gates: ... {...}"). Reasoning models tend to "think out loud" + * before emitting structured output despite explicit JSON-only prompts. + * + * Returns the parsed value on success, `undefined` on any failure. Callers + * treat `undefined` as "structured output unavailable" and degrade via the + * dag-executor's existing missing-structured-output warning. + */ +export function tryParseStructuredOutput(text: string): unknown { + const trimmed = text.trim(); + if (trimmed.length === 0) return undefined; + // Strip ```json / ``` fences if present. Match only at boundaries so we + // don't mangle JSON strings that legitimately contain backticks. + const cleaned = trimmed + .replace(/^```(?:json)?\s*\n?/i, '') + .replace(/\n?\s*```\s*$/, '') + .trim(); + + // Tier 1: clean parse — fast path for fully compliant outputs. + const tier1 = tryJsonParseObject(cleaned); + if (tier1 !== undefined) return tier1; + + // Tier 2: scan forward to the FIRST `{` and parse from there. Recovers the + // preamble-then-JSON pattern reasoning models emit. A backward scan from + // the last `{` was considered but rejected: it silently returns the wrong + // object when the prose contains a brace-bearing example after the real + // payload (e.g. `{"actual":1}\nFor example: {"x":2}` would yield `{x:2}`), + // breaking the conservative-failure contract callers rely on. + const firstBrace = cleaned.indexOf('{'); + if (firstBrace > 0) { + const tier2 = tryJsonParseObject(cleaned.slice(firstBrace)); + if (tier2 !== undefined) return tier2; + } + + return undefined; +} + +/** + * Parse `text` as JSON and only return it if the result is a non-null + * object (or array). Schema augmentation always asks for an object — bare + * `null`, numbers, and strings parse cleanly but are not "structured + * output", so we treat them as missing and let the dag-executor's + * structured_output_missing path engage. + */ +function tryJsonParseObject(text: string): unknown { + try { + const parsed: unknown = JSON.parse(text); + if (parsed === null || typeof parsed !== 'object') return undefined; + return parsed; + } catch { + return undefined; + } +}