diff --git a/.archon/workflows/test-workflows/e2e-copilot-abort.yaml b/.archon/workflows/test-workflows/e2e-copilot-abort.yaml new file mode 100644 index 0000000000..d5b620dd7b --- /dev/null +++ b/.archon/workflows/test-workflows/e2e-copilot-abort.yaml @@ -0,0 +1,14 @@ +# E2E manual abort test — GitHub Copilot community provider +# Verifies: Ctrl-C propagates through the bridge to session.abort() and +# sendAndWait unwinds cleanly without dangling listeners. +# Manual: start, wait for streaming to begin, press Ctrl-C. Not for CI. +name: e2e-copilot-abort +description: 'Manual test: start, then Ctrl-C. Verifies abort wiring.' +provider: copilot +model: gpt-5-mini + +nodes: + - id: long + prompt: 'Count slowly from 1 to 200, one number per line, with a brief phrase after each number explaining its mathematical significance. Do not skip any numbers.' + effort: low + idle_timeout: 120000 diff --git a/.archon/workflows/test-workflows/e2e-copilot-all-nodes-smoke.yaml b/.archon/workflows/test-workflows/e2e-copilot-all-nodes-smoke.yaml new file mode 100644 index 0000000000..d62e6380a9 --- /dev/null +++ b/.archon/workflows/test-workflows/e2e-copilot-all-nodes-smoke.yaml @@ -0,0 +1,147 @@ +# E2E smoke test — Copilot provider, every CI-compatible node type +# Covers: prompt, command, loop (AI node types) + bash, script bun/uv +# (deterministic node types) + depends_on / when / trigger_rule / $nodeId.output +# (DAG features) + Copilot-specific options: effort, allowed_tools, +# output_format (best-effort JSON via prompt augment + 2-tier parser). +# Skipped: `approval:` — pauses for human input, incompatible with CI. +# Auth: `gh auth login` OR `COPILOT_GITHUB_TOKEN`. +# To use `GH_TOKEN` / `GITHUB_TOKEN`, also set `assistantConfig.useLoggedInUser: false`. +# Requires an active GitHub Copilot subscription. +name: e2e-copilot-all-nodes-smoke +description: 'Copilot provider smoke across every CI-compatible node type plus Copilot-specific options.' +provider: copilot +model: gpt-5-mini + +nodes: + # ─── AI node types ────────────────────────────────────────────────────── + + # 1. prompt: inline prompt + effort + allowed_tools (no tool calls). + # Verifies reasoningEffort and availableTools=[] reach the SDK. + - id: prompt-node + prompt: "Reply with exactly the single word 'ok' and nothing else." + allowed_tools: [] + effort: low + idle_timeout: 30000 + + # 2. command: named command file (.archon/commands/e2e-echo-command.md). + # The command echoes back $ARGUMENTS (the workflow invocation message). + - id: command-node + command: e2e-echo-command + allowed_tools: [] + idle_timeout: 30000 + + # 3. loop: iterative AI prompt until completion signal. + # Bounded by max_iterations: 2 so a misbehaving model can't hang CI. + - id: loop-node + loop: + prompt: "Reply with exactly 'DONE' and nothing else." + until: 'DONE' + max_iterations: 2 + allowed_tools: [] + effort: low + idle_timeout: 60000 + + # 4. output_format: Copilot's best-effort structured output path + # (prompt augmented with schema + 2-tier JSON parser on result text). + # Unique to Copilot/Pi vs. Claude/Codex native JSON mode — only an + # E2E test catches "real model drifted around the schema". + - id: structured-node + prompt: | + Return a JSON object with two fields, no fences and no prose: + - "status": always "ok" (string) + - "value": always 42 (number) + allowed_tools: [] + effort: low + idle_timeout: 30000 + output_format: + type: object + properties: + status: + type: string + value: + type: number + required: [status, value] + + # ─── Deterministic node types (no AI) ─────────────────────────────────── + + # 5. bash: shell script with JSON output (enables $nodeId.output.status + # dot-access downstream). + - id: bash-json-node + bash: 'echo ''{"status":"ok"}''' + + # 6. script: bun (TypeScript/JavaScript runtime) + - id: script-bun-node + script: echo-args + runtime: bun + timeout: 30000 + + # 7. script: uv (Python runtime) + - id: script-python-node + script: echo-py + runtime: uv + timeout: 30000 + + # ─── DAG features ─────────────────────────────────────────────────────── + + # 8. depends_on + $nodeId.output substitution + - id: downstream + bash: "echo 'downstream got: $prompt-node.output'" + depends_on: [prompt-node] + + # 9. when: conditional (JSON dot-access on bash JSON output) + - id: gated + bash: "echo 'gated-ok'" + depends_on: [bash-json-node] + when: "$bash-json-node.output.status == 'ok'" + + # 10. when: conditional on AI structured output (proves output_format + # parsed and dot-access works on the resulting object). + - id: structured-check + bash: "echo \"structured.status=$structured-node.output.status\"" + depends_on: [structured-node] + when: "$structured-node.output.status == 'ok'" + + # 11. trigger_rule: merge multiple deps (all_success semantics) + - id: merge + bash: "echo 'merge-ok'" + depends_on: + [downstream, gated, structured-check, script-bun-node, script-python-node] + trigger_rule: all_success + + # ─── Final assertion ──────────────────────────────────────────────────── + + # 12. Verify every upstream node produced non-empty output, including + # dot-access on the structured-output node (proves output_format + # parsed and downstream consumers can index into it). + # Note: value-equality on string fields is avoided on purpose — + # shellQuote() wraps strings in literal single quotes, so a literal + # `[ "$x" != "ok" ]` would always fail. Non-emptiness is the right + # bar for a smoke; the `when:` gate on structured-check already + # proved the value matched 'ok' to reach this node. + - id: assert + bash: | + fail=0 + check() { + local name="$1" + local value="$2" + if [ -z "$value" ]; then + echo "FAIL: $name produced empty output" + fail=1 + fi + } + check prompt-node "$prompt-node.output" + check command-node "$command-node.output" + check loop-node "$loop-node.output" + check bash-json-node "$bash-json-node.output" + check script-bun-node "$script-bun-node.output" + check script-python-node "$script-python-node.output" + check downstream "$downstream.output" + check gated "$gated.output" + check merge "$merge.output" + check structured.status "$structured-node.output.status" + check structured.value "$structured-node.output.value" + + if [ "$fail" -eq 1 ]; then exit 1; fi + echo "PASS: all node types + structured output verified" + depends_on: [merge, loop-node, command-node] + trigger_rule: all_success diff --git a/.env.example b/.env.example index 125ad43e98..fbbb2af82f 100644 --- a/.env.example +++ b/.env.example @@ -38,6 +38,18 @@ CODEX_REFRESH_TOKEN= CODEX_ACCOUNT_ID= # CODEX_BIN_PATH= # Optional: path to Codex native binary (binary builds only) +# GitHub Copilot (community provider — @github/copilot-sdk) +# Requires an active GitHub Copilot subscription. By default, Archon uses +# the credentials you configured via the Copilot CLI (`copilot login`). +# Generic GH_TOKEN / GITHUB_TOKEN (declared below) are intentionally NOT +# picked up — classic PATs lack Copilot entitlement and would fail. To +# opt back into env-token auth, set `useLoggedInUser: false` in +# `.archon/config.yaml`. Setting COPILOT_GITHUB_TOKEN is treated as +# explicit Copilot intent and always wins. +# +# COPILOT_GITHUB_TOKEN= # Copilot-scoped PAT (always wins when set) +# COPILOT_BIN_PATH= # Optional: path to Copilot CLI binary (binary builds only) + # Pi (community provider — @mariozechner/pi-coding-agent) # One adapter, ~20 LLM backends. Archon's Pi adapter picks up credentials # you've already configured via the Pi CLI (`pi /login` writes to @@ -57,7 +69,7 @@ CODEX_ACCOUNT_ID= # OPENROUTER_API_KEY= # Pi provider id: openrouter # HUGGINGFACE_API_KEY= # Pi provider id: huggingface -# Default AI Assistant (must match a registered provider, e.g. claude, codex, pi) +# Default AI Assistant (must match a registered provider, e.g. claude, codex, copilot, pi) # Used for new conversations when no codebase specified — errors on unknown values DEFAULT_AI_ASSISTANT=claude diff --git a/bun.lock b/bun.lock index 7f15ead093..1a5a2c1f00 100644 --- a/bun.lock +++ b/bun.lock @@ -1,6 +1,6 @@ { "lockfileVersion": 1, - "configVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "archon", @@ -130,6 +130,7 @@ "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.121", "@archon/paths": "workspace:*", + "@github/copilot-sdk": "^0.2.2", "@mariozechner/pi-ai": "^0.67.5", "@mariozechner/pi-coding-agent": "^0.67.5", "@openai/codex-sdk": "^0.125.0", @@ -545,6 +546,22 @@ "@floating-ui/utils": ["@floating-ui/utils@0.2.11", "", {}, "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg=="], + "@github/copilot": ["@github/copilot@1.0.34", "", { "optionalDependencies": { "@github/copilot-darwin-arm64": "1.0.34", "@github/copilot-darwin-x64": "1.0.34", "@github/copilot-linux-arm64": "1.0.34", "@github/copilot-linux-x64": "1.0.34", "@github/copilot-win32-arm64": "1.0.34", "@github/copilot-win32-x64": "1.0.34" }, "bin": { "copilot": "npm-loader.js" } }, "sha512-jFYulj1v00b3j43Er9+WwhZ/XldGq7+gti2s2pRhrdPwYEd1PMvscDZwRa/1iUBz/XQ5HUGac1tD8P7+VUpWjg=="], + + "@github/copilot-darwin-arm64": ["@github/copilot-darwin-arm64@1.0.34", "", { "os": "darwin", "cpu": "arm64", "bin": { "copilot-darwin-arm64": "copilot" } }, "sha512-g94EhSLd3a6fckZ6xb/zP2DZJZEx7kONWdOoDiHXUtSqc4RiZ7OBq1EwT4WrPY1lsmy9sioJIcZSGzJd0C1M7Q=="], + + "@github/copilot-darwin-x64": ["@github/copilot-darwin-x64@1.0.34", "", { "os": "darwin", "cpu": "x64", "bin": { "copilot-darwin-x64": "copilot" } }, "sha512-tIgFEZV0ohCF/VgTODJWre3xURsvEd+6IPN/HPKWxG6AXtJOxzjlr5kLYYdPHdNlHNmSxGQw8fWsN2FZ4nyDdw=="], + + "@github/copilot-linux-arm64": ["@github/copilot-linux-arm64@1.0.34", "", { "os": "linux", "cpu": "arm64", "bin": { "copilot-linux-arm64": "copilot" } }, "sha512-feqjEetrlqBUhYskIsPmwACQOWO99cvRpKwIFl3OlEjWoj+//HA7yXh49UIe0gD8wQUI8hy05uVz3K2/xti2nQ=="], + + "@github/copilot-linux-x64": ["@github/copilot-linux-x64@1.0.34", "", { "os": "linux", "cpu": "x64", "bin": { "copilot-linux-x64": "copilot" } }, "sha512-3l0rZZqmceklHizJaaO+Iy2PsAZpVZS9Mn9VYnVcY/8Yzt4Y2hmXSFcKVfc4l+JlhFsPs7trhMdIkfwkjaKPLg=="], + + "@github/copilot-sdk": ["@github/copilot-sdk@0.2.2", "", { "dependencies": { "@github/copilot": "^1.0.21", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" } }, "sha512-VZCqS08YlUM90bUKJ7VLeIxgTTEHtfXBo84T1IUMNvXRREX2csjPH6Z+CPw3S2468RcCLvzBXcc9LtJJTLIWFw=="], + + "@github/copilot-win32-arm64": ["@github/copilot-win32-arm64@1.0.34", "", { "os": "win32", "cpu": "arm64", "bin": { "copilot-win32-arm64": "copilot.exe" } }, "sha512-06kEJO3iyohmAqF4iIbOxOfWLFSIpLDJ1L1oEHRtouMrH2Ll1wrUjsoQT1gXgBOv7rifl25qx/Avx5zKqvuORw=="], + + "@github/copilot-win32-x64": ["@github/copilot-win32-x64@1.0.34", "", { "os": "win32", "cpu": "x64", "bin": { "copilot-win32-x64": "copilot.exe" } }, "sha512-QLL8pS4q2TTyQbClEXxqXtQGPr4lk+pwc8hPMUL7iw7HGDOvs1WCLMT1ZSDPPcxSrTnR/dURX5za1NMA8uF/fw=="], + "@google/genai": ["@google/genai@1.50.1", "", { "dependencies": { "google-auth-library": "^10.3.0", "p-retry": "^4.6.2", "protobufjs": "^7.5.4", "ws": "^8.18.0" }, "peerDependencies": { "@modelcontextprotocol/sdk": "^1.25.2" }, "optionalPeers": ["@modelcontextprotocol/sdk"] }, "sha512-YbkX7H9+1Pt8wOt7DDREy8XSoiL6fRDzZQRyaVBarFf8MR3zHGqVdvM4cLbDXqPhxqvegZShgfxb8kw9C7YhAQ=="], "@grammyjs/types": ["@grammyjs/types@3.26.0", "", {}, "sha512-jlnyfxfev/2o68HlvAGRocAXgdPPX5QabG7jZlbqC2r9DZyWBfzTlg+nu3O3Fy4EhgLWu28hZ/8wr7DsNamP9A=="], @@ -2749,6 +2766,8 @@ "vitefu": ["vitefu@1.1.3", "", { "peerDependencies": { "vite": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0" }, "optionalPeers": ["vite"] }, "sha512-ub4okH7Z5KLjb6hDyjqrGXqWtWvoYdU3IGm/NorpgHncKoLTCfRIbvlhBm7r0YstIaQRYlp4yEbFqDcKSzXSSg=="], + "vscode-jsonrpc": ["vscode-jsonrpc@8.2.1", "", {}, "sha512-kdjOSJ2lLIn7r1rtrMbbNCHjyMPfRnowdKjBQ+mGq6NAW5QY2bEZC/khaC5OR8svbbjvLEaIXkOq45e2X9BIbQ=="], + "web-namespaces": ["web-namespaces@2.0.1", "", {}, "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ=="], "web-streams-polyfill": ["web-streams-polyfill@3.3.3", "", {}, "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw=="], @@ -2851,6 +2870,8 @@ "@expressive-code/plugin-shiki/shiki": ["shiki@3.23.0", "", { "dependencies": { "@shikijs/core": "3.23.0", "@shikijs/engine-javascript": "3.23.0", "@shikijs/engine-oniguruma": "3.23.0", "@shikijs/langs": "3.23.0", "@shikijs/themes": "3.23.0", "@shikijs/types": "3.23.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-55Dj73uq9ZXL5zyeRPzHQsK7Nbyt6Y10k5s7OjuFZGMhpp4r/rsLBH0o/0fstIzX1Lep9VxefWljK/SKCzygIA=="], + "@github/copilot-sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], + "@inquirer/core/wrap-ansi": ["wrap-ansi@6.2.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA=="], "@mariozechner/pi-ai/@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.90.0", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-MzZtPabJF1b0FTDl6Z6H5ljphPwACLGP13lu8MTiB8jXaW/YXlpOp+Po2cVou3MPM5+f5toyLnul9whKCy7fBg=="], diff --git a/packages/core/src/config/config-loader.ts b/packages/core/src/config/config-loader.ts index 4bf22d9144..1b0c672bf5 100644 --- a/packages/core/src/config/config-loader.ts +++ b/packages/core/src/config/config-loader.ts @@ -99,6 +99,7 @@ const SAFE_ASSISTANT_FIELDS: Record = { // community providers — list each field we're confident is safe to // show in the web UI. Unknown providers fall through with no fields. pi: ['model'], + copilot: ['model'], }; function toSafeAssistantDefaults(assistants: AssistantDefaults): SafeConfig['assistants'] { diff --git a/packages/core/src/config/config-types.ts b/packages/core/src/config/config-types.ts index 63dd135907..a24a415b5b 100644 --- a/packages/core/src/config/config-types.ts +++ b/packages/core/src/config/config-types.ts @@ -16,6 +16,7 @@ import type { ClaudeProviderDefaults, CodexProviderDefaults, + CopilotProviderDefaults, PiProviderDefaults, ProviderDefaultsMap, } from '@archon/providers/types'; @@ -23,6 +24,7 @@ import type { export type { ClaudeProviderDefaults, CodexProviderDefaults, + CopilotProviderDefaults, PiProviderDefaults, ProviderDefaultsMap, }; diff --git a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md index de4004a6ba..dd49871651 100644 --- a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md +++ b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md @@ -1,6 +1,6 @@ --- title: AI Assistants -description: Configure Claude Code, Codex, and Pi as AI assistants for Archon. +description: Configure Claude Code, Codex, GitHub Copilot, and Pi as AI assistants for Archon. category: getting-started area: clients audience: [user] @@ -9,7 +9,7 @@ sidebar: order: 4 --- -You must configure **at least one** AI assistant. All three can be configured and mixed within workflows. +You must configure **at least one** AI assistant. All four can be configured and mixed within workflows. ## Claude Code @@ -389,6 +389,95 @@ Unsupported YAML fields trigger a visible warning from the dag-executor when the - [Adding a Community Provider](../contributing/adding-a-community-provider/) — the contributor-facing guide for extending Archon with your own provider. - [Pi on GitHub](https://github.com/badlogic/pi-mono) — upstream project. +## GitHub Copilot (Community Provider) + +**Use a GitHub Copilot subscription inside Archon workflows.** Drives the Copilot CLI via `@github/copilot-sdk`, supporting OpenAI, Anthropic via BYOK, Gemini, and the other models Copilot exposes — switch between them with the `model` field. + +Copilot is registered as `builtIn: false` — like Pi, a bundled community provider rather than a core built-in. + +### Install + +For source installs (`bun run`), the SDK + its bundled CLI dependency come along with `bun install` — nothing extra to do. + +For compiled Archon binaries, install the Copilot CLI yourself and point Archon at it: + +```bash +npm install -g @github/copilot +``` + +Then tell Archon where the binary lives (the resolver searches these in order): + +```ini +# .env +COPILOT_BIN_PATH=/absolute/path/to/copilot +``` + +```yaml +# .archon/config.yaml +assistants: + copilot: + copilotCliPath: /absolute/path/to/copilot +``` + +Or place the binary at `~/.archon/vendor/copilot/copilot` (POSIX) / `~/.archon/vendor/copilot/copilot.exe` (Windows) and the resolver picks it up automatically. + +### Authenticate + +By default, Copilot uses the credentials from your local `copilot login`. Generic `GH_TOKEN` / `GITHUB_TOKEN` env vars are **not** picked up automatically — classic GitHub PATs lack Copilot entitlement and would fail with a misleading SDK error. Auth precedence (highest to lowest): + +1. **`COPILOT_GITHUB_TOKEN`** (env) — always wins when set; treated as explicit Copilot intent +2. **`useLoggedInUser: false`** in `.archon/config.yaml` — opts into env-token auth, including generic `GH_TOKEN` / `GITHUB_TOKEN` +3. **`copilot login` credentials** — the default + +An active GitHub Copilot subscription is required for any of these to work. + +### Copilot Configuration Options + +You can configure Copilot's behavior in `.archon/config.yaml`: + +```yaml +assistants: + copilot: + model: gpt-5-mini # 'gpt-5', 'gpt-5-mini', 'claude-sonnet-4.5', 'auto', etc. + modelReasoningEffort: medium # 'low' | 'medium' | 'high' | 'xhigh' | 'max' (alias for xhigh) + # configDir: /absolute/path/to/copilot-config + # enableConfigDiscovery: false # only enable for trusted repos — bypasses Archon's workflow MCP/skill validation + # useLoggedInUser: false # opt into env-token auth (GH_TOKEN / GITHUB_TOKEN); default uses `copilot login` + # logLevel: error # 'none' | 'error' | 'warning' | 'info' | 'debug' | 'all' +``` + +Copilot accepts OpenAI models (`gpt-5`, `gpt-5-mini`), Anthropic via BYOK (`claude-sonnet-4.5`), Gemini, and more. When no model is configured, Archon passes `model: 'auto'` and Copilot picks. + +### Supported Archon Features + +| Feature | Support | Notes | +|---|---|---| +| Session resume | ✅ | Returns `sessionId`; reused on resume | +| Reasoning control | ✅ | `effort:` / string `thinking:` → Copilot `reasoningEffort`; `max` maps to SDK `xhigh` | +| System prompt override | ✅ | `systemPrompt:` | +| Codebase env vars | ✅ | merged into the spawned Copilot CLI environment | +| Tool restrictions | ✅ | `allowed_tools` → `availableTools`, `denied_tools` → `excludedTools` | +| MCP servers | ✅ | `mcp: path/to/servers.json` → `SessionConfig.mcpServers` (env vars `$FOO` expanded; missing vars warned) | +| Skills | ✅ | `skills: [name]` resolved from `.agents/skills/` or `.claude/skills/` (project or home) → `SessionConfig.skillDirectories` | +| Structured output | ✅ | best-effort via prompt augmentation; unparseable output degrades to dag-executor's missing-output warning | +| Sub-agents (`agents:`) | ✅ | `name`/`description`/`prompt`/`tools` → `SessionConfig.customAgents`; Claude-specific fields (`model`, `disallowedTools`, `skills`, `maxTurns`) warn per agent and are ignored | +| Fork-session retry | ⚠️ | Copilot SDK has no fork API — when Archon requests a fork (on retry), we create a fresh session and emit a system-chunk warning | +| Hooks | ❌ | Archon hooks ≠ Copilot's `SessionHooks` event vocabulary | +| Fallback model | ❌ | not wired | +| Cost control | ❌ | no cost-limit API | +| Sandbox | ❌ | Copilot permissions surface is separate from Archon's sandbox model | + +### Set as Default (Optional) + +```ini +DEFAULT_AI_ASSISTANT=copilot +``` + +### See also + +- [Adding a Community Provider](../contributing/adding-a-community-provider/) — the contributor-facing guide for extending Archon with your own provider. +- [`@github/copilot-sdk`](https://www.npmjs.com/package/@github/copilot-sdk) — upstream SDK. + ## How Assistant Selection Works - Assistant type is set per codebase via the `assistant` field in `.archon/config.yaml` or the `DEFAULT_AI_ASSISTANT` env var diff --git a/packages/providers/package.json b/packages/providers/package.json index d59911b9a6..44118e113b 100644 --- a/packages/providers/package.json +++ b/packages/providers/package.json @@ -14,16 +14,19 @@ "./codex/config": "./src/codex/config.ts", "./codex/binary-resolver": "./src/codex/binary-resolver.ts", "./community/pi": "./src/community/pi/index.ts", + "./community/copilot": "./src/community/copilot/index.ts", + "./community/copilot/binary-resolver": "./src/community/copilot/binary-resolver.ts", "./errors": "./src/errors.ts", "./registry": "./src/registry.ts" }, "scripts": { - "test": "bun test src/claude/provider.test.ts && bun test src/codex/provider.test.ts && bun test src/registry.test.ts && bun test src/codex/binary-guard.test.ts && bun test src/codex/binary-resolver.test.ts && bun test src/codex/binary-resolver-dev.test.ts && bun test src/claude/binary-resolver.test.ts && bun test src/claude/binary-resolver-dev.test.ts && bun test src/community/pi/model-ref.test.ts && bun test src/community/pi/config.test.ts && bun test src/community/pi/event-bridge.test.ts && bun test src/community/pi/options-translator.test.ts && bun test src/community/pi/session-resolver.test.ts && bun test src/community/pi/provider.test.ts && bun test src/community/pi/provider-lazy-load.test.ts", + "test": "bun test src/claude/provider.test.ts && bun test src/codex/provider.test.ts && bun test src/registry.test.ts && bun test src/codex/binary-guard.test.ts && bun test src/codex/binary-resolver.test.ts && bun test src/codex/binary-resolver-dev.test.ts && bun test src/claude/binary-resolver.test.ts && bun test src/claude/binary-resolver-dev.test.ts && bun test src/community/pi/model-ref.test.ts && bun test src/community/pi/config.test.ts && bun test src/community/pi/event-bridge.test.ts && bun test src/community/pi/options-translator.test.ts && bun test src/community/pi/session-resolver.test.ts && bun test src/community/pi/provider.test.ts && bun test src/community/pi/provider-lazy-load.test.ts && bun test src/community/copilot/config.test.ts && bun test src/community/copilot/event-bridge.test.ts && bun test src/community/copilot/binary-resolver-dev.test.ts && bun test src/community/copilot/binary-resolver.test.ts && bun test src/community/copilot/provider.test.ts && bun test src/community/copilot/provider-lazy-load.test.ts && bun test src/community/copilot/provider-hardening.test.ts", "type-check": "bun x tsc --noEmit" }, "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.121", "@archon/paths": "workspace:*", + "@github/copilot-sdk": "~0.2.2", "@mariozechner/pi-ai": "^0.67.5", "@mariozechner/pi-coding-agent": "^0.67.5", "@openai/codex-sdk": "^0.125.0", diff --git a/packages/providers/src/community/copilot/binary-resolver-dev.test.ts b/packages/providers/src/community/copilot/binary-resolver-dev.test.ts new file mode 100644 index 0000000000..99f87769c0 --- /dev/null +++ b/packages/providers/src/community/copilot/binary-resolver-dev.test.ts @@ -0,0 +1,26 @@ +/** + * Tests for the Copilot binary resolver in dev mode (BUNDLED_IS_BINARY=false). + * Separate file because binary-mode tests mock BUNDLED_IS_BINARY=true. + */ +import { describe, test, expect, mock } from 'bun:test'; +import { createMockLogger } from '../../test/mocks/logger'; + +mock.module('@archon/paths', () => ({ + createLogger: mock(() => createMockLogger()), + BUNDLED_IS_BINARY: false, + getArchonHome: mock(() => '/tmp/test-archon-home'), +})); + +import { resolveCopilotBinaryPath } from './binary-resolver'; + +describe('resolveCopilotBinaryPath (dev mode)', () => { + test('returns undefined when BUNDLED_IS_BINARY is false', async () => { + const result = await resolveCopilotBinaryPath(); + expect(result).toBeUndefined(); + }); + + test('returns undefined even with config path set', async () => { + const result = await resolveCopilotBinaryPath('/some/custom/path'); + expect(result).toBeUndefined(); + }); +}); diff --git a/packages/providers/src/community/copilot/binary-resolver.test.ts b/packages/providers/src/community/copilot/binary-resolver.test.ts new file mode 100644 index 0000000000..34250b7b41 --- /dev/null +++ b/packages/providers/src/community/copilot/binary-resolver.test.ts @@ -0,0 +1,235 @@ +/** + * Tests for the Copilot binary resolver in binary mode. + * + * Must run in its own bun test invocation because it mocks @archon/paths + * with BUNDLED_IS_BINARY=true, which conflicts with dev-mode tests. + */ +import { describe, test, expect, mock, beforeEach, afterAll, spyOn } from 'bun:test'; +import { createMockLogger } from '../../test/mocks/logger'; + +const mockLogger = createMockLogger(); + +mock.module('@archon/paths', () => ({ + createLogger: mock(() => mockLogger), + BUNDLED_IS_BINARY: true, + getArchonHome: mock(() => '/tmp/test-archon-home'), +})); + +import * as resolver from './binary-resolver'; + +describe('resolveCopilotBinaryPath (binary mode)', () => { + const originalEnv = process.env.COPILOT_BIN_PATH; + let fileExistsSpy: ReturnType; + let isExecutableFileSpy: ReturnType; + + beforeEach(() => { + delete process.env.COPILOT_BIN_PATH; + fileExistsSpy?.mockRestore(); + isExecutableFileSpy?.mockRestore(); + mockLogger.info.mockClear(); + }); + + afterAll(() => { + if (originalEnv !== undefined) { + process.env.COPILOT_BIN_PATH = originalEnv; + } else { + delete process.env.COPILOT_BIN_PATH; + } + fileExistsSpy?.mockRestore(); + isExecutableFileSpy?.mockRestore(); + }); + + test('uses COPILOT_BIN_PATH env var when set and file is executable', async () => { + process.env.COPILOT_BIN_PATH = '/usr/local/bin/copilot'; + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockReturnValue(true); + + const result = await resolver.resolveCopilotBinaryPath(); + expect(result).toBe('/usr/local/bin/copilot'); + }); + + test('throws when COPILOT_BIN_PATH is set but path is not executable', async () => { + process.env.COPILOT_BIN_PATH = '/nonexistent/copilot'; + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockReturnValue(false); + + await expect(resolver.resolveCopilotBinaryPath()).rejects.toThrow('is not an executable file'); + }); + + test('uses config cliPath when file is executable', async () => { + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockReturnValue(true); + + const result = await resolver.resolveCopilotBinaryPath('/custom/copilot/path'); + expect(result).toBe('/custom/copilot/path'); + }); + + test('throws when config cliPath is not executable', async () => { + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockReturnValue(false); + + await expect(resolver.resolveCopilotBinaryPath('/nonexistent/copilot')).rejects.toThrow( + 'is not an executable file' + ); + }); + + test('env var takes precedence over config path', async () => { + process.env.COPILOT_BIN_PATH = '/env/copilot'; + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockReturnValue(true); + + const result = await resolver.resolveCopilotBinaryPath('/config/copilot'); + expect(result).toBe('/env/copilot'); + }); + + test('checks vendor directory when no env or config path', async () => { + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockImplementation((path: string) => { + const normalized = path.replace(/\\/g, '/'); + return normalized.includes('vendor/copilot'); + }); + + const result = await resolver.resolveCopilotBinaryPath(); + expect(typeof result).toBe('string'); + const normalized = result!.replace(/\\/g, '/'); + expect(normalized).toContain('/tmp/test-archon-home/vendor/copilot/'); + }); + + test('autodetects npm global install at ~/.npm-global/bin/copilot (POSIX)', async () => { + if (process.platform === 'win32') return; + const home = process.env.HOME ?? '/Users/test'; + const expected = `${home}/.npm-global/bin/copilot`; + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockImplementation( + (path: string) => path === expected + ); + + const result = await resolver.resolveCopilotBinaryPath(); + expect(result).toBe(expected); + expect(mockLogger.info).toHaveBeenCalledWith( + { source: 'autodetect' }, + 'copilot.binary_resolved' + ); + }); + + test('autodetects homebrew install on Apple Silicon', async () => { + if (process.platform !== 'darwin' || process.arch !== 'arm64') return; + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockImplementation( + (path: string) => path === '/opt/homebrew/bin/copilot' + ); + + const result = await resolver.resolveCopilotBinaryPath(); + expect(result).toBe('/opt/homebrew/bin/copilot'); + expect(mockLogger.info).toHaveBeenCalledWith( + { source: 'autodetect' }, + 'copilot.binary_resolved' + ); + }); + + test('autodetects system install at /usr/local/bin/copilot', async () => { + if (process.platform === 'win32') return; + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockImplementation( + (path: string) => path === '/usr/local/bin/copilot' + ); + + const result = await resolver.resolveCopilotBinaryPath(); + expect(result).toBe('/usr/local/bin/copilot'); + }); + + test('vendor directory takes precedence over autodetect', async () => { + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockImplementation((path: string) => { + const normalized = path.replace(/\\/g, '/'); + return normalized.includes('vendor/copilot') || normalized.includes('.npm-global'); + }); + + const result = await resolver.resolveCopilotBinaryPath(); + expect(result!.replace(/\\/g, '/')).toContain('/vendor/copilot/'); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ source: 'vendor' }), + 'copilot.binary_resolved' + ); + }); + + test('falls back to PATH lookup when no canonical path matches', async () => { + const pathResult = '/some/non-canonical/bin/copilot'; + // Tiers 3/4 use isExecutableFile; return false for all except the PATH result so they fall + // through to the PATH tier, then return true so the PATH result is accepted. + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockImplementation( + (path: string) => path === pathResult + ); + const resolveFromPathSpy = spyOn(resolver, 'resolveFromPath').mockReturnValue(pathResult); + + try { + const result = await resolver.resolveCopilotBinaryPath(); + expect(result).toBe(pathResult); + expect(mockLogger.info).toHaveBeenCalledWith({ source: 'path' }, 'copilot.binary_resolved'); + } finally { + resolveFromPathSpy.mockRestore(); + } + }); + + test('rejects PATH lookup result that is not executable', async () => { + // PATH returned a stale shim or non-exec file — must NOT be returned; + // resolver must continue to the install-instructions throw. + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); + const resolveFromPathSpy = spyOn(resolver, 'resolveFromPath').mockReturnValue( + '/stale/shim/copilot' + ); + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockReturnValue(false); + + try { + await expect(resolver.resolveCopilotBinaryPath()).rejects.toThrow( + 'Copilot CLI binary not found' + ); + } finally { + resolveFromPathSpy.mockRestore(); + } + }); + + test('throws with install instructions when binary not found anywhere', async () => { + isExecutableFileSpy = spyOn(resolver, 'isExecutableFile').mockReturnValue(false); + const resolveFromPathSpy = spyOn(resolver, 'resolveFromPath').mockReturnValue(undefined); + + try { + await expect(resolver.resolveCopilotBinaryPath()).rejects.toThrow( + 'Copilot CLI binary not found' + ); + } finally { + resolveFromPathSpy.mockRestore(); + } + }); +}); + +describe('isExecutableFile', () => { + // These tests run real fs ops against fixtures in os.tmpdir(). They exercise + // the actual statSync / accessSync code path rather than mocking fs. + const fs = require('node:fs') as typeof import('node:fs'); + const os = require('node:os') as typeof import('node:os'); + const path = require('node:path') as typeof import('node:path'); + + const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'archon-copilot-resolver-')); + const execFile = path.join(tmpRoot, 'has-exec-bit'); + const noExecFile = path.join(tmpRoot, 'no-exec-bit'); + const dirPath = path.join(tmpRoot, 'a-directory'); + const missingPath = path.join(tmpRoot, 'does-not-exist'); + + fs.writeFileSync(execFile, '#!/bin/sh\necho hi\n'); + fs.chmodSync(execFile, 0o755); + fs.writeFileSync(noExecFile, 'plain text\n'); + fs.chmodSync(noExecFile, 0o644); + fs.mkdirSync(dirPath); + + afterAll(() => { + fs.rmSync(tmpRoot, { recursive: true, force: true }); + }); + + test('returns true for a regular file with the exec bit set', () => { + expect(resolver.isExecutableFile(execFile)).toBe(true); + }); + + test('returns false for a regular file without the exec bit (POSIX only)', () => { + if (process.platform === 'win32') return; // win32 has no Unix exec bits + expect(resolver.isExecutableFile(noExecFile)).toBe(false); + }); + + test('returns false for a directory', () => { + expect(resolver.isExecutableFile(dirPath)).toBe(false); + }); + + test('returns false for a missing path', () => { + expect(resolver.isExecutableFile(missingPath)).toBe(false); + }); +}); diff --git a/packages/providers/src/community/copilot/binary-resolver.ts b/packages/providers/src/community/copilot/binary-resolver.ts new file mode 100644 index 0000000000..a80490797b --- /dev/null +++ b/packages/providers/src/community/copilot/binary-resolver.ts @@ -0,0 +1,205 @@ +/** + * Copilot CLI binary resolver for compiled (bun --compile) archon binaries. + * + * The @github/copilot-sdk bundles @github/copilot (the CLI) as a transitive + * dep, and by default the SDK resolves the binary from its own bundled copy + * via `import.meta.url`. In compiled archon binaries that path is frozen to + * the build host's filesystem, so we resolve explicitly and pass the result + * via `new CopilotClient({ cliPath })`. + * + * Resolution order: + * 1. `COPILOT_BIN_PATH` environment variable + * 2. `assistants.copilot.copilotCliPath` in config + * 3. `~/.archon/vendor/copilot/` (user-placed) + * 4. Autodetect canonical install paths (npm prefix defaults per platform) + * 5. PATH lookup via `which` / `where` + * 6. Throw with install instructions + * + * Mirrors `codex/binary-resolver.ts` and `claude/binary-resolver.ts`. + */ +import { + accessSync as _accessSync, + constants as fsConstants, + existsSync as _existsSync, + statSync as _statSync, +} from 'node:fs'; +import { execFileSync as _execFileSync } from 'node:child_process'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; +import { BUNDLED_IS_BINARY, getArchonHome, createLogger } from '@archon/paths'; + +/** + * Resolve `copilot` via the OS path lookup (`which` / `where`). Wrapper is + * exported so tests can spy on it without spawning real subprocesses. + * Returns the first hit on PATH, or undefined when the lookup yields nothing + * or fails (the lookup tool itself missing, etc.). + */ +export function resolveFromPath(): string | undefined { + const lookupCmd = process.platform === 'win32' ? 'where' : 'which'; + // 'where copilot' (no .exe) resolves npm shims (.cmd) and .exe; 'copilot.exe' alone misses them. + const executable = 'copilot'; + try { + const output = _execFileSync(lookupCmd, [executable], { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'ignore'], + }).trim(); + const first = output.split(/\r?\n/)[0]?.trim(); + return first || undefined; + } catch { + return undefined; + } +} + +/** Wrapper for existsSync — enables spyOn in tests (direct imports can't be spied on). */ +export function fileExists(path: string): boolean { + return _existsSync(path); +} + +/** + * True if `path` is a regular file the current user can execute. On win32, + * Node's `stat.mode` does not track Unix exec bits, so we fall back to + * "is a file" — which matches how `where` / `PATH` resolution works there. + * + * Use for env- and config-supplied paths so a user pointing at a directory + * or a non-executable file fails loudly at resolve time, before the SDK + * tries to spawn it. + */ +export function isExecutableFile(path: string): boolean { + try { + const stat = _statSync(path); + if (!stat.isFile()) return false; + if (process.platform === 'win32') return true; + // accessSync(X_OK) checks current-user executability — `mode & 0o111` + // alone proves *some* exec bit exists (e.g., mode 001 fails for owner). + _accessSync(path, fsConstants.X_OK); + return true; + } catch { + return false; + } +} + +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('copilot-binary'); + return cachedLog; +} + +const COPILOT_VENDOR_DIR = 'vendor/copilot'; +const SUPPORTED_PLATFORMS = ['darwin', 'linux', 'win32']; + +function getVendorBinaryName(): string | undefined { + if (!SUPPORTED_PLATFORMS.includes(process.platform)) return undefined; + if (process.arch !== 'x64' && process.arch !== 'arm64') return undefined; + return process.platform === 'win32' ? 'copilot.exe' : 'copilot'; +} + +/** + * Resolve the path to the Copilot CLI binary. + * + * In dev mode: returns undefined (SDK resolves via its bundled CLI). + * In binary mode: env / config / vendor / autodetect, else throw. + */ +export async function resolveCopilotBinaryPath( + configCliPath?: string +): Promise { + if (!BUNDLED_IS_BINARY) return undefined; + + // 1. Environment variable override + const envPath = process.env.COPILOT_BIN_PATH; + if (envPath) { + if (!isExecutableFile(envPath)) { + throw new Error( + `COPILOT_BIN_PATH is set to "${envPath}" but it is not an executable file.\n` + + 'Please verify the path points to the Copilot CLI executable (chmod +x if needed).' + ); + } + getLog().info({ source: 'env' }, 'copilot.binary_resolved'); + return envPath; + } + + // 2. Config file override + if (configCliPath) { + if (!isExecutableFile(configCliPath)) { + throw new Error( + `assistants.copilot.copilotCliPath is set to "${configCliPath}" but it is not an executable file.\n` + + 'Please verify the path in .archon/config.yaml points to the Copilot CLI executable (chmod +x if needed).' + ); + } + getLog().info({ source: 'config' }, 'copilot.binary_resolved'); + return configCliPath; + } + + // 3. Vendor directory (user-placed) + const binaryName = getVendorBinaryName(); + if (binaryName) { + const archonHome = getArchonHome(); + const vendorBinaryPath = join(archonHome, COPILOT_VENDOR_DIR, binaryName); + if (isExecutableFile(vendorBinaryPath)) { + getLog().info({ source: 'vendor' }, 'copilot.binary_resolved'); + return vendorBinaryPath; + } + } + + // 4. Autodetect canonical install paths + const autodetectPaths = getAutodetectPaths(); + for (const probePath of autodetectPaths) { + if (isExecutableFile(probePath)) { + getLog().info({ source: 'autodetect' }, 'copilot.binary_resolved'); + return probePath; + } + } + + // 5. PATH lookup via which/where — catches non-canonical installs + // (volta, asdf, fnm, custom prefixes, etc.) the canonical-paths list + // can't enumerate. Validate with isExecutableFile so a stale shim doesn't + // hand back a non-executable path. + const fromPath = resolveFromPath(); + if (fromPath && isExecutableFile(fromPath)) { + getLog().info({ source: 'path' }, 'copilot.binary_resolved'); + return fromPath; + } + + // 6. Not found — throw with install instructions + const vendorPath = `~/.archon/${COPILOT_VENDOR_DIR}/`; + throw new Error( + 'Copilot CLI binary not found. The Copilot provider requires the\n' + + '@github/copilot CLI, which cannot be resolved automatically in\n' + + 'compiled Archon builds.\n\n' + + 'To fix, choose one of:\n' + + ' 1. Install globally: npm install -g @github/copilot\n' + + ' Then set: COPILOT_BIN_PATH=$(which copilot)\n\n' + + ` 2. Place the binary at: ${vendorPath}\n\n` + + ' 3. Set the path in config:\n' + + ' # .archon/config.yaml\n' + + ' assistants:\n' + + ' copilot:\n' + + ' copilotCliPath: /path/to/copilot\n' + ); +} + +/** + * Canonical install locations probed by tier 4 autodetect. Grounded in + * npm's global-install contract (the binary lands at `{npm_prefix}/bin/` + * on POSIX, `{npm_prefix}\.cmd` on Windows). + */ +function getAutodetectPaths(): string[] { + const paths: string[] = []; + + if (process.platform === 'win32') { + const appData = process.env.APPDATA; + if (appData) paths.push(join(appData, 'npm', 'copilot.cmd')); + paths.push(join(homedir(), '.npm-global', 'copilot.cmd')); + return paths; + } + + // POSIX (macOS + Linux) + paths.push(join(homedir(), '.npm-global', 'bin', 'copilot')); + + if (process.platform === 'darwin' && process.arch === 'arm64') { + paths.push('/opt/homebrew/bin/copilot'); + } + + paths.push('/usr/local/bin/copilot'); + + return paths; +} diff --git a/packages/providers/src/community/copilot/capabilities.ts b/packages/providers/src/community/copilot/capabilities.ts new file mode 100644 index 0000000000..10b020daae --- /dev/null +++ b/packages/providers/src/community/copilot/capabilities.ts @@ -0,0 +1,28 @@ +import type { ProviderCapabilities } from '../../types'; + +/** + * Copilot capabilities — each flag declares behavior that is wired end-to-end + * through `provider.ts` (translation + SDK integration) and `event-bridge.ts` + * (streaming). Flipping a flag to `true` suppresses the dag-executor's + * per-capability warning, so keep each flag honest. + * + * `effortControl` + `thinkingControl` are both true because Copilot's + * `reasoningEffort` gates both the model's reasoning budget and the + * `assistant.reasoning_delta` event stream — one SDK axis that covers both + * Archon concepts. + */ +export const COPILOT_CAPABILITIES: ProviderCapabilities = { + sessionResume: true, + mcp: true, + hooks: false, + skills: true, + agents: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: false, + effortControl: true, + thinkingControl: true, + fallbackModel: false, + sandbox: false, +}; diff --git a/packages/providers/src/community/copilot/config.test.ts b/packages/providers/src/community/copilot/config.test.ts new file mode 100644 index 0000000000..991d0f044c --- /dev/null +++ b/packages/providers/src/community/copilot/config.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, test } from 'bun:test'; + +import { parseCopilotConfig } from './config'; + +describe('parseCopilotConfig', () => { + test('returns empty object for empty input', () => { + expect(parseCopilotConfig({})).toEqual({}); + }); + + test('parses valid model string', () => { + expect(parseCopilotConfig({ model: 'gpt-5' })).toEqual({ model: 'gpt-5' }); + }); + + test('drops non-string model silently', () => { + expect(parseCopilotConfig({ model: 123 })).toEqual({}); + expect(parseCopilotConfig({ model: null })).toEqual({}); + expect(parseCopilotConfig({ model: [] })).toEqual({}); + }); + + test('parses each valid reasoning effort value', () => { + for (const v of ['low', 'medium', 'high', 'xhigh'] as const) { + expect(parseCopilotConfig({ modelReasoningEffort: v })).toEqual({ + modelReasoningEffort: v, + }); + } + }); + + test('drops unknown reasoning effort value', () => { + expect(parseCopilotConfig({ modelReasoningEffort: 'minimal' })).toEqual({}); + expect(parseCopilotConfig({ modelReasoningEffort: 'extreme' })).toEqual({}); + expect(parseCopilotConfig({ modelReasoningEffort: 42 })).toEqual({}); + }); + + test('normalizes Archon alias `max` to SDK `xhigh`', () => { + expect(parseCopilotConfig({ modelReasoningEffort: 'max' })).toEqual({ + modelReasoningEffort: 'xhigh', + }); + }); + + test('parses copilotCliPath string', () => { + expect(parseCopilotConfig({ copilotCliPath: '/usr/local/bin/copilot' })).toEqual({ + copilotCliPath: '/usr/local/bin/copilot', + }); + }); + + test('drops non-string copilotCliPath', () => { + expect(parseCopilotConfig({ copilotCliPath: 42 })).toEqual({}); + }); + + test('parses configDir string', () => { + expect(parseCopilotConfig({ configDir: '/tmp/copilot-config' })).toEqual({ + configDir: '/tmp/copilot-config', + }); + }); + + test('parses enableConfigDiscovery boolean', () => { + expect(parseCopilotConfig({ enableConfigDiscovery: true })).toEqual({ + enableConfigDiscovery: true, + }); + expect(parseCopilotConfig({ enableConfigDiscovery: false })).toEqual({ + enableConfigDiscovery: false, + }); + }); + + test('drops non-boolean enableConfigDiscovery', () => { + expect(parseCopilotConfig({ enableConfigDiscovery: 'yes' })).toEqual({}); + expect(parseCopilotConfig({ enableConfigDiscovery: 1 })).toEqual({}); + }); + + test('parses useLoggedInUser boolean', () => { + expect(parseCopilotConfig({ useLoggedInUser: true })).toEqual({ useLoggedInUser: true }); + expect(parseCopilotConfig({ useLoggedInUser: false })).toEqual({ useLoggedInUser: false }); + }); + + test('parses each valid logLevel enum', () => { + for (const v of ['none', 'error', 'warning', 'info', 'debug', 'all'] as const) { + expect(parseCopilotConfig({ logLevel: v })).toEqual({ logLevel: v }); + } + }); + + test('drops invalid logLevel', () => { + expect(parseCopilotConfig({ logLevel: 'verbose' })).toEqual({}); + expect(parseCopilotConfig({ logLevel: 42 })).toEqual({}); + }); + + test('ignores unknown keys', () => { + expect(parseCopilotConfig({ futureField: 'x', model: 'gpt-5' })).toEqual({ + model: 'gpt-5', + }); + }); + + test('does not throw on malformed input', () => { + expect(() => parseCopilotConfig({ model: null })).not.toThrow(); + expect(() => parseCopilotConfig({ modelReasoningEffort: {} })).not.toThrow(); + expect(() => parseCopilotConfig({ logLevel: null })).not.toThrow(); + }); + + test('combines all fields', () => { + expect( + parseCopilotConfig({ + model: 'gpt-5-mini', + modelReasoningEffort: 'high', + copilotCliPath: '/bin/copilot', + configDir: '/etc/copilot', + enableConfigDiscovery: true, + useLoggedInUser: false, + logLevel: 'debug', + }) + ).toEqual({ + model: 'gpt-5-mini', + modelReasoningEffort: 'high', + copilotCliPath: '/bin/copilot', + configDir: '/etc/copilot', + enableConfigDiscovery: true, + useLoggedInUser: false, + logLevel: 'debug', + }); + }); +}); diff --git a/packages/providers/src/community/copilot/config.ts b/packages/providers/src/community/copilot/config.ts new file mode 100644 index 0000000000..26eab163ce --- /dev/null +++ b/packages/providers/src/community/copilot/config.ts @@ -0,0 +1,60 @@ +import type { CopilotProviderDefaults } from '../../types'; + +export type { CopilotProviderDefaults }; + +/** + * Parse raw `assistants.copilot` config into a typed `CopilotProviderDefaults`. + * + * Fallback behavior: fields with unexpected types (or enum values outside the + * declared set) are silently omitted rather than throwing. A broken user + * config must not prevent provider registration or workflow discovery. + * Callers that want strict validation should validate upstream. + */ +export function parseCopilotConfig(raw: Record): CopilotProviderDefaults { + const config: CopilotProviderDefaults = {}; + + if (typeof raw.model === 'string') { + config.model = raw.model; + } + + if (typeof raw.modelReasoningEffort === 'string') { + const v = raw.modelReasoningEffort; + if (v === 'low' || v === 'medium' || v === 'high' || v === 'xhigh') { + config.modelReasoningEffort = v; + } else if (v === 'max') { + // Accept Archon's workflow-schema alias for the top tier. Normalizing + // at parse time keeps `CopilotProviderDefaults.modelReasoningEffort` + // aligned with the SDK's enum (which has no 'max'). + config.modelReasoningEffort = 'xhigh'; + } + } + + if (typeof raw.copilotCliPath === 'string') { + config.copilotCliPath = raw.copilotCliPath; + } + + if (typeof raw.configDir === 'string') { + config.configDir = raw.configDir; + } + + if (typeof raw.enableConfigDiscovery === 'boolean') { + config.enableConfigDiscovery = raw.enableConfigDiscovery; + } + + if (typeof raw.useLoggedInUser === 'boolean') { + config.useLoggedInUser = raw.useLoggedInUser; + } + + if ( + raw.logLevel === 'none' || + raw.logLevel === 'error' || + raw.logLevel === 'warning' || + raw.logLevel === 'info' || + raw.logLevel === 'debug' || + raw.logLevel === 'all' + ) { + config.logLevel = raw.logLevel; + } + + return config; +} diff --git a/packages/providers/src/community/copilot/event-bridge.test.ts b/packages/providers/src/community/copilot/event-bridge.test.ts new file mode 100644 index 0000000000..0f1ce6f1e7 --- /dev/null +++ b/packages/providers/src/community/copilot/event-bridge.test.ts @@ -0,0 +1,303 @@ +import { describe, expect, mock, test } from 'bun:test'; +import { createMockLogger } from '../../test/mocks/logger'; + +mock.module('@archon/paths', () => ({ + createLogger: mock(() => createMockLogger()), +})); + +import type { SessionEvent } from '@github/copilot-sdk'; + +import type { MessageChunk, TokenUsage } from '../../types'; +import { + AsyncQueue, + mapCopilotEvent, + normalizeCopilotUsage, + type EventMapperContext, +} from './event-bridge'; + +function makeCtx(): EventMapperContext & { + capturedUsage: TokenUsage | undefined; + erroredWith: string | undefined; +} { + const toolCallIdToName = new Map(); + let capturedUsage: TokenUsage | undefined; + let erroredWith: string | undefined; + return { + toolCallIdToName, + captureUsage: (u: TokenUsage): void => { + capturedUsage = u; + }, + markErrored: (msg: string): void => { + erroredWith = msg; + }, + get capturedUsage() { + return capturedUsage; + }, + get erroredWith() { + return erroredWith; + }, + }; +} + +// Helper: construct a minimal SessionEvent with the required shape. We cast +// via unknown because the full SessionEvent union includes many optional +// fields we don't care about in this unit test. +function evt(type: T, data: unknown): SessionEvent { + return { + id: 'test-event-id', + timestamp: new Date().toISOString(), + parentId: null, + type, + data, + } as unknown as SessionEvent; +} + +describe('AsyncQueue', () => { + test('delivers items pushed before iteration starts', async () => { + const q = new AsyncQueue(); + q.push(1); + q.push(2); + q.close(); + const out: number[] = []; + for await (const v of q) out.push(v); + expect(out).toEqual([1, 2]); + }); + + test('blocks consumer until item is pushed', async () => { + const q = new AsyncQueue(); + const iter = q[Symbol.asyncIterator](); + const next = iter.next(); + let resolved = false; + void next.then(() => { + resolved = true; + }); + await new Promise(resolve => setTimeout(resolve, 10)); + expect(resolved).toBe(false); + q.push('hello'); + const result = await next; + expect(result).toEqual({ value: 'hello', done: false }); + }); + + test('close() drains pending waiters with done=true', async () => { + const q = new AsyncQueue(); + const iter = q[Symbol.asyncIterator](); + const next = iter.next(); + q.close(); + const result = await next; + expect(result).toEqual({ value: undefined, done: true }); + }); + + test('rejects second consumer (single-consumer invariant)', () => { + const q = new AsyncQueue(); + // First iteration — OK. + q[Symbol.asyncIterator](); + // Second iteration — throws synchronously at the call site. + expect(() => q[Symbol.asyncIterator]()).toThrow(/single-consumer/); + }); + + test('push after close is a no-op (does not throw)', () => { + const q = new AsyncQueue(); + q.close(); + expect(() => q.push(1)).not.toThrow(); + }); + + test('close() is idempotent', () => { + const q = new AsyncQueue(); + q.close(); + expect(() => q.close()).not.toThrow(); + }); +}); + +describe('normalizeCopilotUsage', () => { + test('returns undefined when input is undefined', () => { + expect(normalizeCopilotUsage(undefined)).toBeUndefined(); + }); + + test('returns undefined when neither input nor output is numeric', () => { + expect(normalizeCopilotUsage({})).toBeUndefined(); + expect(normalizeCopilotUsage({ inputTokens: 'x' as unknown as number })).toBeUndefined(); + }); + + test('fills missing side with 0 when only one is numeric', () => { + expect(normalizeCopilotUsage({ inputTokens: 100 })).toEqual({ input: 100, output: 0 }); + expect(normalizeCopilotUsage({ outputTokens: 50 })).toEqual({ input: 0, output: 50 }); + }); + + test('maps both input and output when present', () => { + expect(normalizeCopilotUsage({ inputTokens: 100, outputTokens: 42 })).toEqual({ + input: 100, + output: 42, + }); + }); +}); + +describe('mapCopilotEvent', () => { + test('assistant.message_delta → assistant chunk with deltaContent', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent( + evt('assistant.message_delta', { messageId: 'm1', deltaContent: 'Hello ' }), + ctx + ); + expect(out).toEqual([{ type: 'assistant', content: 'Hello ' }]); + }); + + test('assistant.message_delta with empty content is dropped', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent( + evt('assistant.message_delta', { messageId: 'm1', deltaContent: '' }), + ctx + ); + expect(out).toEqual([]); + }); + + test('assistant.reasoning_delta → thinking chunk', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent( + evt('assistant.reasoning_delta', { messageId: 'm1', deltaContent: 'hmm ' }), + ctx + ); + expect(out).toEqual([{ type: 'thinking', content: 'hmm ' }]); + }); + + test('assistant.usage → no chunk, captures usage via callback', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent( + evt('assistant.usage', { model: 'gpt-5', inputTokens: 7, outputTokens: 42 }), + ctx + ); + expect(out).toEqual([]); + expect(ctx.capturedUsage).toEqual({ input: 7, output: 42 }); + }); + + test('tool.execution_start → tool chunk + records name by id', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent( + evt('tool.execution_start', { + toolCallId: 'c1', + toolName: 'bash', + arguments: { cmd: 'ls' }, + }), + ctx + ); + expect(out).toEqual([ + { + type: 'tool', + toolName: 'bash', + toolInput: { cmd: 'ls' }, + toolCallId: 'c1', + }, + ]); + expect(ctx.toolCallIdToName.get('c1')).toBe('bash'); + }); + + test('tool.execution_start without arguments uses empty object', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent( + evt('tool.execution_start', { toolCallId: 'c1', toolName: 'read' }), + ctx + ); + expect((out[0] as { toolInput: unknown }).toolInput).toEqual({}); + }); + + test('tool.execution_complete on success → tool_result chunk with detailedContent', () => { + const ctx = makeCtx(); + ctx.toolCallIdToName.set('c1', 'bash'); + const out = mapCopilotEvent( + evt('tool.execution_complete', { + toolCallId: 'c1', + success: true, + result: { content: 'brief', detailedContent: 'full diff output' }, + }), + ctx + ); + expect(out).toEqual([ + { + type: 'tool_result', + toolName: 'bash', + toolOutput: 'full diff output', + toolCallId: 'c1', + }, + ]); + }); + + test('tool.execution_complete falls back to content when detailedContent absent', () => { + const ctx = makeCtx(); + ctx.toolCallIdToName.set('c1', 'read'); + const out = mapCopilotEvent( + evt('tool.execution_complete', { + toolCallId: 'c1', + success: true, + result: { content: 'file contents' }, + }), + ctx + ); + expect((out[0] as { toolOutput: string }).toolOutput).toBe('file contents'); + }); + + test('tool.execution_complete on failure → system warning + tool_result with ❌', () => { + const ctx = makeCtx(); + ctx.toolCallIdToName.set('c1', 'bash'); + const out = mapCopilotEvent( + evt('tool.execution_complete', { + toolCallId: 'c1', + success: false, + result: { content: 'permission denied' }, + }), + ctx + ); + expect(out).toEqual([ + { type: 'system', content: '⚠️ Tool bash failed' }, + { + type: 'tool_result', + toolName: 'bash', + toolOutput: '❌ permission denied', + toolCallId: 'c1', + }, + ]); + }); + + test('tool.execution_complete with unknown toolCallId uses "unknown"', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent( + evt('tool.execution_complete', { + toolCallId: 'missing', + success: true, + result: { content: 'x' }, + }), + ctx + ); + expect((out[0] as { toolName: string }).toolName).toBe('unknown'); + }); + + test('session.error → no chunk emitted, markErrored called (deferred to bridgeSession)', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent( + evt('session.error', { errorType: 'rate_limit', message: 'Slow down' }), + ctx + ); + // Defer the system chunk to bridgeSession so it can suppress the warning + // when SDK auto-recovery still delivers a fallback assistant message. + expect(out).toEqual([]); + expect(ctx.erroredWith).toBe('Slow down'); + }); + + test('session.error with missing message records fallback string', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent(evt('session.error', { errorType: 'unknown' }), ctx); + expect(out).toEqual([]); + expect(ctx.erroredWith).toBe('Copilot session error'); + }); + + test('session.compaction_start → context-compaction system chunk', () => { + const ctx = makeCtx(); + const out = mapCopilotEvent(evt('session.compaction_start', {}), ctx); + expect(out).toEqual([{ type: 'system', content: '⚙️ Compacting context…' }]); + }); + + test('unhandled event types yield no chunks', () => { + const ctx = makeCtx(); + expect(mapCopilotEvent(evt('session.idle', {}), ctx)).toEqual([]); + expect(mapCopilotEvent(evt('assistant.turn_start', { turnId: 't1' }), ctx)).toEqual([]); + expect(mapCopilotEvent(evt('user.message', {}), ctx)).toEqual([]); + }); +}); diff --git a/packages/providers/src/community/copilot/event-bridge.ts b/packages/providers/src/community/copilot/event-bridge.ts new file mode 100644 index 0000000000..db619c2923 --- /dev/null +++ b/packages/providers/src/community/copilot/event-bridge.ts @@ -0,0 +1,434 @@ +/** + * Event bridge between @github/copilot-sdk's callback-based session.on() API + * and Archon's async-generator MessageChunk contract. + * + * Three concerns in this file: + * 1. `AsyncQueue` — single-producer / single-consumer queue; copied + * verbatim from `community/pi/event-bridge.ts`. Peer community providers + * stay decoupled (no cross-imports). + * 2. `mapCopilotEvent(event, toolCallIdToName, captureUsage)` — pure fn + * translating one SDK event into zero or more MessageChunks. Testable + * in isolation. + * 3. `bridgeSession(session, prompt, abortSignal?)` — wired integration + * wrapper; lives here rather than in provider.ts so the queue/listener/ + * cleanup lifecycle stays readable. + * + * Module-scope invariant: type-only imports from @github/copilot-sdk. Value + * imports go inside `provider.ts` via dynamic `await import(...)`. See the + * PI lazy-load test for rationale. + */ +import { createLogger } from '@archon/paths'; +import type { AssistantMessageEvent, CopilotSession, SessionEvent } from '@github/copilot-sdk'; + +import type { MessageChunk, TokenUsage } from '../../types'; +import { tryParseStructuredOutput } from '../../shared/structured-output'; + +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.copilot.event-bridge'); + return cachedLog; +} + +// ─── AsyncQueue ────────────────────────────────────────────────────────── + +/** + * Single-producer / single-consumer async queue. Bridges the SDK's + * callback-based `session.on()` into an async generator. + * + * Design: + * - producers call `push(item)` from any synchronous context + * - the consumer awaits `for await (const item of queue)` ONCE + * - sentinel items (in this bridge: `done` / `error`) are pushed by the + * caller; the queue itself does not know about them + * + * Single-consumer is a hard invariant — a second iterator would race with + * the first over both the buffer and the waiters list, silently dropping + * items. Constructor enforces: first `Symbol.asyncIterator` sets + * `consumed=true`; subsequent calls throw loudly during development. + */ +export class AsyncQueue implements AsyncIterable { + private readonly buffer: T[] = []; + private readonly waiters: ((result: IteratorResult) => void)[] = []; + private consumed = false; + private closed = false; + + push(item: T): void { + if (this.closed) return; + const waiter = this.waiters.shift(); + if (waiter) waiter({ value: item, done: false }); + else this.buffer.push(item); + } + + /** + * Terminate iteration cleanly. Drains any pending waiters with + * `{ done: true }` so the consumer exits the `for await` loop instead of + * hanging when the producer's finally block fires before a new item + * arrives (e.g. consumer abort mid-iteration). + */ + close(): void { + if (this.closed) return; + this.closed = true; + while (this.waiters.length > 0) { + const waiter = this.waiters.shift(); + if (waiter) waiter({ value: undefined, done: true }); + } + } + + [Symbol.asyncIterator](): AsyncIterator { + if (this.consumed) { + throw new Error( + 'AsyncQueue: a single queue can only be iterated once (single-consumer invariant). Create a new queue for each consumer.' + ); + } + this.consumed = true; + return this.iterate(); + } + + private async *iterate(): AsyncGenerator { + while (true) { + const next = this.buffer.shift(); + if (next !== undefined) { + yield next; + continue; + } + if (this.closed) return; + const result = await new Promise>(resolve => { + this.waiters.push(resolve); + }); + if (result.done) return; + yield result.value; + } + } +} + +// ─── Usage + event → chunk translation ──────────────────────────────────── + +/** + * Coerce the SDK's `assistant.usage.data` shape into Archon's TokenUsage. + * Returns undefined if neither input nor output token count is a number, + * so callers don't emit a meaningless result chunk with {0, 0}. + */ +export function normalizeCopilotUsage(raw?: { + inputTokens?: number; + outputTokens?: number; +}): TokenUsage | undefined { + if (!raw) return undefined; + const input = raw.inputTokens; + const output = raw.outputTokens; + if (typeof input !== 'number' && typeof output !== 'number') return undefined; + const usage: TokenUsage = { + input: typeof input === 'number' ? input : 0, + output: typeof output === 'number' ? output : 0, + }; + return usage; +} + +/** + * Pure mapper: one SDK event → zero or more MessageChunks, plus side-effect + * callbacks into closure state (toolCallId → toolName map, usage capture). + * + * Splitting side-effects from pure return value lets the test table drive + * the MessageChunk output while spies verify the closure interactions. + * + * Events intentionally NOT mapped: + * - `user.message` — echo of our own prompt + * - `assistant.message` / `assistant.reasoning` — boundary events; + * streaming is covered by `*_delta` events. If deltas were somehow + * absent, `bridgeSession` has a safety-net using sendAndWait's return. + * - `session.idle` — internal signal; sendAndWait resolves on it + * - turn_start/turn_end, streaming_delta, intent, compaction_complete, + * task_complete, context_changed, title_changed, etc. — internal + * housekeeping, no user-facing chunk + */ +export interface EventMapperContext { + /** Populated by tool.execution_start, read by tool.execution_complete. */ + toolCallIdToName: Map; + /** Called when assistant.usage arrives; undefined for non-usage events. */ + captureUsage: (usage: TokenUsage) => void; + /** Flagged on session.error; consumer decides whether to promote to isError on the terminal result. */ + markErrored: (errorMsg: string) => void; +} + +/** + * Translate one Copilot SDK `SessionEvent` into zero or more Archon + * `MessageChunk`s, mutating the supplied context (tool-call id → name map, + * captured usage, terminal error) as a side-effect. Keeping the side-effects + * behind a closure lets unit tests drive pure inputs and assert on both the + * returned chunks and the context mutations. + */ +export function mapCopilotEvent(event: SessionEvent, ctx: EventMapperContext): MessageChunk[] { + switch (event.type) { + case 'assistant.message_delta': { + const content = event.data.deltaContent; + if (!content) return []; + return [{ type: 'assistant', content }]; + } + case 'assistant.reasoning_delta': { + const content = event.data.deltaContent; + if (!content) return []; + return [{ type: 'thinking', content }]; + } + case 'assistant.usage': { + const usage = normalizeCopilotUsage(event.data); + if (usage) ctx.captureUsage(usage); + return []; + } + case 'tool.execution_start': { + const { toolCallId, toolName, arguments: args } = event.data; + ctx.toolCallIdToName.set(toolCallId, toolName); + return [ + { + type: 'tool', + toolName, + toolInput: args ?? {}, + toolCallId, + }, + ]; + } + case 'tool.execution_complete': { + const { toolCallId, success, result } = event.data; + const toolName = ctx.toolCallIdToName.get(toolCallId) ?? 'unknown'; + // Prefer detailedContent (full output) over content (truncated for LLM). + const rawOutput = result?.detailedContent ?? result?.content ?? ''; + const chunks: MessageChunk[] = []; + if (!success) { + chunks.push({ + type: 'system', + content: `⚠️ Tool ${toolName} failed`, + }); + } + chunks.push({ + type: 'tool_result', + toolName, + toolOutput: success ? rawOutput : `❌ ${rawOutput}`, + toolCallId, + }); + return chunks; + } + case 'session.error': { + // Don't emit a system chunk here — defer until after sendAndWait + // resolves. If the SDK delivers a fallback assistant message (transient + // upstream errors are common on auto-retry paths), the user got what + // they asked for and a "⚠️ ..." chunk is just noise. The bridgeSession + // wrapper checks `sawAssistantContent` and emits the warning only when + // no assistant content reached the consumer. + const msg = event.data.message || 'Copilot session error'; + ctx.markErrored(msg); + return []; + } + case 'session.compaction_start': { + return [{ type: 'system', content: '⚙️ Compacting context…' }]; + } + default: { + getLog().debug({ eventType: event.type }, 'copilot.unhandled_event_type'); + return []; + } + } +} + +// ─── bridgeSession integration wrapper ──────────────────────────────────── + +/** + * Backstop timeout passed to `session.sendAndWait()`. + * + * The SDK defaults to 60s, which is far too short — any tool-heavy turn or + * workflow node with a larger `idle_timeout` would trip the SDK timer before + * Archon's own idle / abort machinery gets a say. The SDK docs also note + * that this timeout *only* stops the wait; it does not abort in-flight agent + * work — so a small value causes the session to keep running in the + * background, orphaned. We therefore set a 60-minute ceiling (2× Archon's + * `STEP_IDLE_TIMEOUT_MS`) and rely on `abortSignal → session.abort()` to be + * the real cancel path. + */ +const SEND_AND_WAIT_TIMEOUT_MS = 60 * 60 * 1000; + +export type BridgeQueueItem = + | { kind: 'chunk'; chunk: MessageChunk } + | { kind: 'done' } + | { kind: 'error'; error: Error }; + +/** + * Bridge a CopilotSession into an async generator of MessageChunks. + * + * Lifecycle: + * 1. Subscribe to the session's event stream. Each event is translated via + * `mapCopilotEvent` and pushed into an `AsyncQueue`. Listener-thrown + * errors are captured and pushed as `{ kind: 'error' }` so the consumer + * surfaces them instead of swallowing. + * 2. Wire `abortSignal` to `session.abort()`. Fire-and-forget — the SDK + * will surface the resulting rejection through `sendAndWait`, which + * feeds the queue. + * 3. Call `session.sendAndWait({ prompt })` in parallel. Resolution pushes + * `{ kind: 'done' }`; rejection pushes `{ kind: 'error' }`. Its return + * value is stashed as a safety net for the no-streaming-deltas case. + * 4. Consume the queue, yielding chunks. On `done`, emit a terminal + * `{ type: 'result', sessionId, tokens?, isError? }` chunk. Tokens are + * captured via the `assistant.usage` event earlier in the stream. + * 5. Finally: close the queue, unsubscribe, remove abort listener, call + * `session.disconnect()` (best-effort), and await the sendAndWait + * promise to let the SDK settle (errors already surfaced via queue). + */ +export async function* bridgeSession( + session: CopilotSession, + prompt: string, + abortSignal?: AbortSignal, + jsonSchema?: Record +): AsyncGenerator { + const log = getLog(); + const queue = new AsyncQueue(); + const toolCallIdToName = new Map(); + let capturedTokens: TokenUsage | undefined; + let errorMessage: string | undefined; + + // Structured-output buffer. Populated only when the caller supplied a + // schema; parsed into the terminal result chunk after the run completes. + const wantsStructured = jsonSchema !== undefined; + let assistantBuffer = ''; + + const ctx: EventMapperContext = { + toolCallIdToName, + captureUsage: (u: TokenUsage): void => { + capturedTokens = u; + }, + markErrored: (msg: string): void => { + errorMessage = msg; + }, + }; + + const unsubscribe = session.on((event: SessionEvent) => { + try { + const chunks = mapCopilotEvent(event, ctx); + for (const chunk of chunks) { + if (wantsStructured && chunk.type === 'assistant') { + assistantBuffer += chunk.content; + } + queue.push({ kind: 'chunk', chunk }); + } + } catch (err) { + queue.push({ kind: 'error', error: err as Error }); + } + }); + + const onAbort = (): void => { + void session.abort().catch(err => { + log.debug({ err, sessionId: session.sessionId }, 'copilot.abort_failed'); + }); + }; + // `addEventListener('abort', ...)` is a no-op on an already-aborted signal, + // so short-circuit before handing the 24-hour sendAndWait path a signal + // that will never fire. Caller's caller (the executor) treats AbortError + // as a clean cancellation. Clean up listeners + queue first so the throw + // doesn't leak resources. + if (abortSignal?.aborted) { + onAbort(); + queue.close(); + try { + unsubscribe(); + } catch (err) { + log.debug({ err }, 'copilot.unsubscribe_failed'); + } + try { + await session.disconnect(); + } catch (err) { + log.debug({ err, sessionId: session.sessionId }, 'copilot.disconnect_failed'); + } + throw new DOMException('Copilot sendQuery aborted before start', 'AbortError'); + } + if (abortSignal) { + abortSignal.addEventListener('abort', onAbort, { once: true }); + } + + // Kick off sendAndWait; it resolves on `session.idle`. The explicit + // timeout overrides the SDK's 60s default — see SEND_AND_WAIT_TIMEOUT_MS. + let sendResult: AssistantMessageEvent | undefined; + const sendPromise = session.sendAndWait({ prompt }, SEND_AND_WAIT_TIMEOUT_MS).then( + (r: AssistantMessageEvent | undefined) => { + sendResult = r; + queue.push({ kind: 'done' }); + }, + (err: unknown) => { + queue.push({ kind: 'error', error: err as Error }); + } + ); + + let sawAssistantContent = false; + try { + for await (const item of queue) { + if (item.kind === 'done') break; + if (item.kind === 'error') throw item.error; + if (item.chunk.type === 'assistant') sawAssistantContent = true; + yield item.chunk; + } + + // Safety net: if `streaming: true` didn't produce deltas for some reason + // (older SDK, model quirks, BYOK provider), emit the accumulated final + // content from sendAndWait's return value so the user doesn't lose output. + if (!sawAssistantContent && sendResult?.data?.content) { + if (wantsStructured) assistantBuffer += sendResult.data.content; + yield { type: 'assistant', content: sendResult.data.content }; + sawAssistantContent = true; + } + + // Emit the deferred session.error warning only if no assistant content + // reached the consumer. When the SDK auto-recovers and still delivers a + // fallback message (the common case for transient upstream errors), the + // ⚠️ chunk is noise and gets suppressed. + if (!sawAssistantContent && errorMessage) { + yield { type: 'system', content: `⚠️ ${errorMessage}` }; + } + + // Terminal result chunk — always emit, even on error, so the executor + // gets a session ID back (useful for resume). + const result: MessageChunk = { + type: 'result', + sessionId: session.sessionId, + }; + if (capturedTokens) result.tokens = capturedTokens; + if (!sawAssistantContent && errorMessage) { + result.isError = true; + result.errors = [errorMessage]; + } + if (wantsStructured) { + const parsed = tryParseStructuredOutput(assistantBuffer); + if (parsed !== undefined) { + result.structuredOutput = parsed; + } else { + log.warn( + { bufferLength: assistantBuffer.length, sessionId: session.sessionId }, + 'copilot.structured_output_parse_failed' + ); + } + } + yield result; + } finally { + queue.close(); + try { + unsubscribe(); + } catch (err) { + log.debug({ err }, 'copilot.unsubscribe_failed'); + } + if (abortSignal) { + abortSignal.removeEventListener('abort', onAbort); + } + // Abort before disconnect: if the consumer closed the generator early + // (return() / break), sendAndWait is still running in the background. + // Without an explicit abort, the finally would wait on sendPromise for up + // to SEND_AND_WAIT_TIMEOUT_MS. abort() tells the SDK to cancel the run; + // disconnect() tears down the connection. + try { + await session.abort(); + } catch (err) { + log.debug({ err, sessionId: session.sessionId }, 'copilot.abort_cleanup_failed'); + } + try { + await session.disconnect(); + } catch (err) { + log.debug({ err, sessionId: session.sessionId }, 'copilot.disconnect_failed'); + } + // Let the SDK's sendPromise settle so we don't leave a dangling promise. + // Any error was already pushed to the queue. + await sendPromise.catch(() => { + /* already surfaced via queue */ + }); + } +} diff --git a/packages/providers/src/community/copilot/index.ts b/packages/providers/src/community/copilot/index.ts new file mode 100644 index 0000000000..3b6054f901 --- /dev/null +++ b/packages/providers/src/community/copilot/index.ts @@ -0,0 +1,5 @@ +export { COPILOT_CAPABILITIES } from './capabilities'; +export { parseCopilotConfig, type CopilotProviderDefaults } from './config'; +export { resolveCopilotBinaryPath, fileExists } from './binary-resolver'; +export { CopilotProvider, resetCopilotSingleton } from './provider'; +export { registerCopilotProvider } from './registration'; diff --git a/packages/providers/src/community/copilot/provider-hardening.test.ts b/packages/providers/src/community/copilot/provider-hardening.test.ts new file mode 100644 index 0000000000..50662072f4 --- /dev/null +++ b/packages/providers/src/community/copilot/provider-hardening.test.ts @@ -0,0 +1,297 @@ +/** + * Hardening tests for CopilotProvider — defensive behaviors that protect + * against caller-side mistakes and SDK-side cleanup failures. + * + * Covers: + * - early rejection on already-aborted abortSignal (no sendAndWait call) + * - model whitespace trimming (request and assistantConfig fallback) + * - session.error suppression when SDK delivers fallback assistant content + * - disconnect/stop cleanup errors don't mask the primary result/error + * + * Runs in its own bun test invocation — mocks @github/copilot-sdk and + * @archon/paths process-wide. + */ +import { beforeEach, describe, expect, mock, test } from 'bun:test'; +import type { SessionEvent } from '@github/copilot-sdk'; + +import { createMockLogger } from '../../test/mocks/logger'; + +const mockLogger = createMockLogger(); +mock.module('@archon/paths', () => ({ + createLogger: mock(() => mockLogger), + BUNDLED_IS_BINARY: false, + getArchonHome: mock(() => '/tmp/test-archon-home'), +})); + +interface FakeSession { + sessionId: string; + prompt?: string; + aborted: boolean; + disconnected: boolean; + listener: ((event: SessionEvent) => void) | undefined; + fire: (event: SessionEvent) => void; + resolveSend: (result?: unknown) => void; + rejectSend: (err: Error) => void; + setDisconnectImpl: (fn: () => Promise) => void; +} + +function makeFakeSession(sessionId = 'sess-hardening'): FakeSession { + let resolveSend: (v?: unknown) => void = () => undefined; + let rejectSend: (e: Error) => void = () => undefined; + const sendPromise = new Promise((resolve, reject) => { + resolveSend = resolve; + rejectSend = reject; + }); + let disconnectImpl: () => Promise = async () => undefined; + const fake: FakeSession = { + sessionId, + prompt: undefined, + aborted: false, + disconnected: false, + listener: undefined, + fire(event) { + if (this.listener) this.listener(event); + }, + resolveSend(result) { + resolveSend(result); + }, + rejectSend(err) { + rejectSend(err); + }, + setDisconnectImpl(fn) { + disconnectImpl = fn; + }, + }; + const session = fake as FakeSession & { + on: (h: (e: SessionEvent) => void) => () => void; + sendAndWait: (opts: { prompt: string }, timeout?: number) => Promise; + disconnect: () => Promise; + abort: () => Promise; + }; + session.on = (handler): (() => void) => { + fake.listener = handler; + return (): void => { + fake.listener = undefined; + }; + }; + session.sendAndWait = async (opts): Promise => { + fake.prompt = opts.prompt; + sendAndWaitCallCount++; + return sendPromise; + }; + session.disconnect = async (): Promise => { + fake.disconnected = true; + await disconnectImpl(); + }; + session.abort = async (): Promise => { + fake.aborted = true; + }; + return session as unknown as FakeSession; +} + +let sendAndWaitCallCount = 0; +let nextCreateSessionResult: FakeSession | Error; +let stopImpl: () => Promise = async () => []; + +const createSessionSpy = mock((_opts: unknown): Promise => { + if (nextCreateSessionResult instanceof Error) { + return Promise.reject(nextCreateSessionResult); + } + return Promise.resolve(nextCreateSessionResult); +}); +const stopSpy = mock(async (): Promise => stopImpl()); + +class FakeCopilotClient { + createSession = createSessionSpy; + resumeSession = mock(async () => { + throw new Error('resumeSession not used in hardening tests'); + }); + stop = stopSpy; + constructor(_opts: Record) {} +} + +const approveAllStub = mock(() => ({ kind: 'approved' })); + +mock.module('@github/copilot-sdk', () => ({ + CopilotClient: FakeCopilotClient, + approveAll: approveAllStub, +})); + +import { CopilotProvider, resetCopilotSingleton } from './provider'; + +function evt(type: T, data: unknown): SessionEvent { + return { + id: 'test', + timestamp: new Date().toISOString(), + parentId: null, + type, + data, + } as unknown as SessionEvent; +} + +async function collect( + generator: AsyncGenerator +): Promise<{ chunks: unknown[]; error?: Error }> { + const chunks: unknown[] = []; + try { + for await (const chunk of generator) chunks.push(chunk); + return { chunks }; + } catch (error) { + return { chunks, error: error as Error }; + } +} + +describe('CopilotProvider hardening', () => { + beforeEach(() => { + resetCopilotSingleton(); + sendAndWaitCallCount = 0; + stopImpl = async (): Promise => []; + createSessionSpy.mockClear(); + stopSpy.mockClear(); + approveAllStub.mockClear(); + }); + + test('rejects early when abortSignal is already aborted', async () => { + const session = makeFakeSession('sess-already-aborted'); + nextCreateSessionResult = session; + + const controller = new AbortController(); + controller.abort(); + + const { error } = await collect( + new CopilotProvider().sendQuery('hi', '/repo', undefined, { + model: 'gpt-5', + abortSignal: controller.signal, + }) + ); + + expect(error).toBeDefined(); + expect(error?.name).toBe('AbortError'); + // sendAndWait must NOT have been entered + expect(sendAndWaitCallCount).toBe(0); + }); + + test('trims whitespace from the model before assigning to SessionConfig', async () => { + const session = makeFakeSession('sess-trim-model'); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/repo', undefined, { model: ' gpt-5-mini ' }); + const firstNext = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.fire(evt('assistant.message_delta', { messageId: 'm', deltaContent: 'ok' })); + session.resolveSend(undefined); + await firstNext; + await collect(gen); + + expect(createSessionSpy).toHaveBeenCalledTimes(1); + const opts = createSessionSpy.mock.calls[0]![0] as { model: string }; + expect(opts.model).toBe('gpt-5-mini'); + }); + + test('falls back to assistantConfig.model and trims that too', async () => { + const session = makeFakeSession('sess-fallback-model'); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/repo', undefined, { + assistantConfig: { model: ' gpt-5 ' }, + }); + const firstNext = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.fire(evt('assistant.message_delta', { messageId: 'm', deltaContent: 'ok' })); + session.resolveSend(undefined); + await firstNext; + await collect(gen); + + expect(createSessionSpy).toHaveBeenCalledTimes(1); + const opts = createSessionSpy.mock.calls[0]![0] as { model: string }; + expect(opts.model).toBe('gpt-5'); + }); + + test('does NOT emit a spurious session-error warning when fallback assistant content was delivered', async () => { + const session = makeFakeSession('sess-fallback-after-error'); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/repo', undefined, { model: 'gpt-5' }); + const firstNext = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + + // Simulate: session.error fires, then sendAndWait still resolves with a + // fallback final assistant message (the SDK auto-recovered). + session.fire(evt('session.error', { errorType: 'transient', message: 'some transient error' })); + session.resolveSend({ data: { content: 'FALLBACK', messageId: 'final' } }); + + const firstResult = await firstNext; + const { chunks: rest, error } = await collect(gen); + const chunks: unknown[] = []; + if (firstResult.value !== undefined) chunks.push(firstResult.value); + chunks.push(...rest); + + expect(error).toBeUndefined(); + // The fallback content reached the consumer as an assistant chunk — + // either via the safety-net path or the streaming path. + expect(chunks).toContainEqual( + expect.objectContaining({ type: 'assistant', content: 'FALLBACK' }) + ); + // The session-error must NOT produce a system warning when fallback + // content was delivered. + expect(chunks).not.toContainEqual( + expect.objectContaining({ + type: 'system', + content: expect.stringContaining('some transient error'), + }) + ); + }); + + test('cleanup failure in disconnect does not mask the primary result', async () => { + const session = makeFakeSession('sess-disconnect-fails'); + session.setDisconnectImpl(async (): Promise => { + throw new Error('disconnect blew up'); + }); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/repo', undefined, { model: 'gpt-5' }); + const firstNext = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.fire(evt('assistant.message_delta', { messageId: 'm', deltaContent: 'hello' })); + session.resolveSend(undefined); + await firstNext; + const { chunks, error } = await collect(gen); + + expect(error).toBeUndefined(); + expect(chunks).toContainEqual(expect.objectContaining({ type: 'result' })); + }); + + test('cleanup failure in client.stop does not mask the friendly primary error', async () => { + const session = makeFakeSession('sess-stop-fails'); + nextCreateSessionResult = session; + stopImpl = async (): Promise => { + throw new Error('client.stop blew up'); + }; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/repo', undefined, { model: 'gpt-5' }); + const firstNext = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.rejectSend(new Error('Model not available')); + + let primaryError: Error | undefined; + try { + await firstNext; + } catch (e) { + primaryError = e as Error; + } + if (!primaryError) { + // The error may surface from subsequent generator iteration. + const { error } = await collect(gen); + primaryError = error; + } + + // The friendly model-access error must survive the stop() throw. + expect(primaryError?.message).toMatch(/Copilot model access error/i); + expect(primaryError?.message ?? '').not.toContain('client.stop blew up'); + }); +}); diff --git a/packages/providers/src/community/copilot/provider-lazy-load.test.ts b/packages/providers/src/community/copilot/provider-lazy-load.test.ts new file mode 100644 index 0000000000..55dc659a8f --- /dev/null +++ b/packages/providers/src/community/copilot/provider-lazy-load.test.ts @@ -0,0 +1,44 @@ +/** + * Regression test: @github/copilot-sdk must not load at module-import time. + * + * The SDK spawns the Copilot CLI subprocess from `new CopilotClient()`, and + * its module graph may evolve in ways that add filesystem reads at import. + * Inside a compiled Archon binary, eager SDK resolution during + * `registerCommunityProviders()` would crash bootstrap before any command + * runs. We defend by doing all SDK value imports inside `sendQuery` / + * `getCopilotClient` via dynamic `await import(...)`. + * + * Detection: replace the SDK with a `mock.module` factory that flips a + * boolean the first time it resolves. Walk the same registration path the + * CLI and server take and assert the flag never tipped. + * + * Runs in its own `bun test` invocation because Bun's `mock.module` is + * process-wide and would interfere with `provider.test.ts`, which installs + * richer SDK stubs (see CLAUDE.md on test isolation). + */ +import { expect, mock, test } from 'bun:test'; + +let copilotSdkLoaded = false; + +mock.module('@github/copilot-sdk', () => { + copilotSdkLoaded = true; + return {}; +}); + +test('registering and instantiating the Copilot provider does not eagerly load the SDK', async () => { + const { clearRegistry, getAgentProvider, registerCommunityProviders } = + await import('../../registry'); + + clearRegistry(); + registerCommunityProviders(); + + const provider = getAgentProvider('copilot'); + expect(provider.getType()).toBe('copilot'); + expect(provider.getCapabilities()).toBeDefined(); + + // If this fails, someone reintroduced a static `import { ... } from + // '@github/copilot-sdk'` somewhere in the module chain reachable from + // `registerCommunityProviders()`. Fix by moving that value import inside + // `CopilotProvider.sendQuery()` (or a helper it calls). + expect(copilotSdkLoaded).toBe(false); +}); diff --git a/packages/providers/src/community/copilot/provider.test.ts b/packages/providers/src/community/copilot/provider.test.ts new file mode 100644 index 0000000000..f3040d800f --- /dev/null +++ b/packages/providers/src/community/copilot/provider.test.ts @@ -0,0 +1,547 @@ +/** + * CopilotProvider end-to-end test with a fully mocked @github/copilot-sdk. + * + * Covers: streaming chunks flow through the async generator, resume + * fallback on missing session, abort wiring, unsupported-option log-warn, + * missing-model throw, terminal result chunk carries sessionId + tokens. + * + * Runs in its own bun test invocation — mocks @github/copilot-sdk and + * @archon/paths process-wide. + */ +import { beforeEach, describe, expect, mock, test } from 'bun:test'; +import type { SessionEvent } from '@github/copilot-sdk'; + +import { createMockLogger } from '../../test/mocks/logger'; + +// ─── Mocks ─────────────────────────────────────────────────────────────── + +const mockLogger = createMockLogger(); +mock.module('@archon/paths', () => ({ + createLogger: mock(() => mockLogger), + BUNDLED_IS_BINARY: false, + getArchonHome: mock(() => '/tmp/test-archon-home'), +})); + +// Minimal fake session. Records prompt, exposes the listener so tests can +// fire events synthetically, and resolves sendAndWait when `resolveSend()` is called. +interface FakeSession { + sessionId: string; + prompt?: string; + sendTimeout?: number; + aborted: boolean; + disconnected: boolean; + listener: ((event: SessionEvent) => void) | undefined; + fire: (event: SessionEvent) => void; + resolveSend: (result?: unknown) => void; + rejectSend: (err: Error) => void; +} + +function makeFakeSession(sessionId = 'sess-1'): FakeSession { + let resolveSend: (v?: unknown) => void = () => undefined; + let rejectSend: (e: Error) => void = () => undefined; + const sendPromise = new Promise((resolve, reject) => { + resolveSend = resolve; + rejectSend = reject; + }); + const fake: FakeSession = { + sessionId, + prompt: undefined, + aborted: false, + disconnected: false, + listener: undefined, + fire(event) { + if (this.listener) this.listener(event); + }, + resolveSend(result) { + resolveSend(result); + }, + rejectSend(err) { + rejectSend(err); + }, + }; + // Attach the session-shape methods the provider/bridge call: + const session = fake as FakeSession & { + on: (h: (e: SessionEvent) => void) => () => void; + sendAndWait: (opts: { prompt: string }, timeout?: number) => Promise; + disconnect: () => Promise; + abort: () => Promise; + }; + session.on = (handler): (() => void) => { + fake.listener = handler; + return () => { + fake.listener = undefined; + }; + }; + session.sendAndWait = async (opts, timeout): Promise => { + fake.prompt = opts.prompt; + fake.sendTimeout = timeout; + return sendPromise; + }; + session.disconnect = async (): Promise => { + fake.disconnected = true; + }; + session.abort = async (): Promise => { + fake.aborted = true; + }; + return session as unknown as FakeSession; +} + +// Test-controlled fake client. We rebuild it per test via reset(). +let nextCreateSessionResult: FakeSession | Error; +let nextResumeSessionResult: FakeSession | Error; +const createSessionSpy = mock((_opts: unknown): Promise => { + if (nextCreateSessionResult instanceof Error) { + return Promise.reject(nextCreateSessionResult); + } + return Promise.resolve(nextCreateSessionResult); +}); +const resumeSessionSpy = mock((_id: string, _opts: unknown): Promise => { + if (nextResumeSessionResult instanceof Error) { + return Promise.reject(nextResumeSessionResult); + } + return Promise.resolve(nextResumeSessionResult); +}); + +let lastClientOpts: Record | undefined; +class FakeCopilotClient { + createSession = createSessionSpy; + resumeSession = resumeSessionSpy; + constructor(opts: Record) { + lastClientOpts = opts; + } +} + +// Capture the onPermissionRequest passed into createSession. +const approveAllStub = mock(() => ({ kind: 'approved' })); + +mock.module('@github/copilot-sdk', () => ({ + CopilotClient: FakeCopilotClient, + approveAll: approveAllStub, +})); + +// Provider imports AFTER mocks are installed. +import { CopilotProvider, resetCopilotSingleton } from './provider'; + +function evt(type: T, data: unknown): SessionEvent { + return { + id: 'test', + timestamp: new Date().toISOString(), + parentId: null, + type, + data, + } as unknown as SessionEvent; +} + +// Drain an async generator (used when the producer feeds events async). +async function collect(gen: AsyncGenerator): Promise { + const out: T[] = []; + for await (const x of gen) out.push(x); + return out; +} + +describe('CopilotProvider.getType / getCapabilities', () => { + test('getType returns copilot', () => { + expect(new CopilotProvider().getType()).toBe('copilot'); + }); + + test('getCapabilities matches COPILOT_CAPABILITIES', () => { + const c = new CopilotProvider().getCapabilities(); + expect(c.sessionResume).toBe(true); + expect(c.effortControl).toBe(true); + expect(c.thinkingControl).toBe(true); + expect(c.mcp).toBe(true); + expect(c.hooks).toBe(false); + }); +}); + +describe('CopilotProvider.sendQuery', () => { + beforeEach(() => { + resetCopilotSingleton(); + createSessionSpy.mockClear(); + resumeSessionSpy.mockClear(); + approveAllStub.mockClear(); + lastClientOpts = undefined; + }); + + test('defaults to model="auto" when none is configured', async () => { + const session = makeFakeSession('sess-default-auto'); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/tmp', undefined, { assistantConfig: {} }); + + const firstNext = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.fire(evt('assistant.message_delta', { messageId: 'm', deltaContent: 'hi' })); + session.resolveSend(undefined); + await firstNext; + await collect(gen); + + expect(createSessionSpy).toHaveBeenCalledTimes(1); + const opts = createSessionSpy.mock.calls[0]![0] as { model: string }; + expect(opts.model).toBe('auto'); + }); + + test('passes model + streaming=true + workingDirectory to createSession', async () => { + const session = makeFakeSession('sess-1'); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hello', '/work/dir', undefined, { model: 'gpt-5' }); + + // Drive: start iteration (kicks off createSession); fire a tiny stream + // and resolve sendAndWait so the generator completes. + const firstNext = gen.next(); + // Give the async chain a tick so createSession resolves. + await new Promise(resolve => setTimeout(resolve, 5)); + session.fire(evt('assistant.message_delta', { messageId: 'm', deltaContent: 'hi' })); + session.resolveSend(undefined); + const chunks = [(await firstNext).value, ...(await collect(gen))]; + + expect(createSessionSpy).toHaveBeenCalledTimes(1); + const opts = createSessionSpy.mock.calls[0]![0] as { + model: string; + streaming: boolean; + workingDirectory: string; + }; + expect(opts.model).toBe('gpt-5'); + expect(opts.streaming).toBe(true); + expect(opts.workingDirectory).toBe('/work/dir'); + expect(session.prompt).toBe('hello'); + expect(chunks.some(c => c && typeof c === 'object' && 'type' in c && c.type === 'result')).toBe( + true + ); + }); + + test('reasoningEffort from nodeConfig.effort passes through', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + nodeConfig: { effort: 'high' }, + }); + + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + const opts = createSessionSpy.mock.calls[0]![0] as { reasoningEffort?: string }; + expect(opts.reasoningEffort).toBe('high'); + }); + + test('workflow `effort: max` maps to SDK `xhigh`', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + nodeConfig: { effort: 'max' }, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + const opts = createSessionSpy.mock.calls[0]![0] as { reasoningEffort?: string }; + expect(opts.reasoningEffort).toBe('xhigh'); + }); + + test('invalid effort value is dropped (not passed to SDK)', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + nodeConfig: { effort: 'minimal' }, // Copilot doesn't support + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + const opts = createSessionSpy.mock.calls[0]![0] as { reasoningEffort?: string }; + expect(opts.reasoningEffort).toBeUndefined(); + }); + + test('systemPrompt wraps to systemMessage with append mode', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + systemPrompt: 'Be concise.', + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + const opts = createSessionSpy.mock.calls[0]![0] as { + systemMessage?: { content: string; mode: string }; + }; + expect(opts.systemMessage).toEqual({ content: 'Be concise.', mode: 'append' }); + }); + + test('resume failure falls back to createSession with warning chunk', async () => { + const session = makeFakeSession('sess-new'); + nextResumeSessionResult = new Error('session not found'); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', 'sess-missing', { model: 'gpt-5' }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + const chunks = [(await first).value, ...(await collect(gen))]; + + expect(resumeSessionSpy).toHaveBeenCalledTimes(1); + expect(createSessionSpy).toHaveBeenCalledTimes(1); + const systemChunk = chunks.find( + c => c && typeof c === 'object' && 'type' in c && c.type === 'system' + ) as { content: string } | undefined; + expect(systemChunk?.content).toContain('Could not resume'); + }); + + test('forkSession=true with resumeSessionId creates fresh session (SDK has no fork)', async () => { + const session = makeFakeSession('sess-fresh'); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', 'sess-prior', { + model: 'gpt-5', + forkSession: true, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + const chunks = [(await first).value, ...(await collect(gen))]; + + // resumeSession MUST NOT be called — we fork to fresh instead. + expect(resumeSessionSpy).not.toHaveBeenCalled(); + expect(createSessionSpy).toHaveBeenCalledTimes(1); + const systemChunk = chunks.find( + c => c && typeof c === 'object' && 'type' in c && c.type === 'system' + ) as { content: string } | undefined; + expect(systemChunk?.content).toContain('does not support session forking'); + }); + + test('resumeSessionId without forkSession resumes in place (node-to-node continuation)', async () => { + const session = makeFakeSession('sess-resumed'); + nextResumeSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', 'sess-prior', { + model: 'gpt-5', + forkSession: false, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + expect(resumeSessionSpy).toHaveBeenCalledTimes(1); + expect(createSessionSpy).not.toHaveBeenCalled(); + }); + + test('sendAndWait receives explicit timeout > SDK default of 60s', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { model: 'gpt-5' }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + expect(session.sendTimeout).toBeDefined(); + expect(session.sendTimeout!).toBeGreaterThan(60_000); + }); + + test('terminal result chunk carries sessionId and tokens from usage event', async () => { + const session = makeFakeSession('sess-42'); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { model: 'gpt-5' }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.fire(evt('assistant.usage', { model: 'gpt-5', inputTokens: 10, outputTokens: 3 })); + session.resolveSend(undefined); + const chunks = [(await first).value, ...(await collect(gen))]; + + const result = chunks.find( + c => c && typeof c === 'object' && 'type' in c && c.type === 'result' + ) as { sessionId?: string; tokens?: { input: number; output: number } } | undefined; + expect(result?.sessionId).toBe('sess-42'); + expect(result?.tokens).toEqual({ input: 10, output: 3 }); + }); + + test('abort signal triggers session.abort', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const ac = new AbortController(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + abortSignal: ac.signal, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + ac.abort(); + // Give the abort listener a tick to run session.abort(). + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + expect(session.aborted).toBe(true); + }); + + test('session.disconnect is called in finally (even on success)', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { model: 'gpt-5' }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + expect(session.disconnected).toBe(true); + }); + + test('forkSession + persistSession boolean flags logged at debug (not thrown)', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + persistSession: false, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + // No throw, and no warn-level log for persistSession — debug is fine. + const warnCalls = mockLogger.warn.mock.calls; + const sawUnsupported = warnCalls.some(args => args[1] === 'copilot.option_not_supported'); + expect(sawUnsupported).toBe(false); + }); + + test('GH_TOKEN is ignored by default (logged-in user wins)', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + env: { GH_TOKEN: 'ghp_testtoken' }, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + expect(lastClientOpts?.githubToken).toBeUndefined(); + expect(lastClientOpts?.useLoggedInUser).toBe(true); + }); + + test('COPILOT_GITHUB_TOKEN is always used (intent signal)', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + env: { COPILOT_GITHUB_TOKEN: 'ghp_copilot' }, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + expect(lastClientOpts?.githubToken).toBe('ghp_copilot'); + expect(lastClientOpts?.useLoggedInUser).toBe(false); + }); + + test('useLoggedInUser:false opts into generic GH_TOKEN', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + env: { GH_TOKEN: 'ghp_testtoken' }, + assistantConfig: { useLoggedInUser: false }, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + expect(lastClientOpts?.githubToken).toBe('ghp_testtoken'); + expect(lastClientOpts?.useLoggedInUser).toBe(false); + }); + + test('assistantConfig.useLoggedInUser=true overrides env token', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { + model: 'gpt-5', + env: { GH_TOKEN: 'ghp_testtoken' }, + assistantConfig: { useLoggedInUser: true }, + }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.resolveSend(undefined); + await first; + await collect(gen); + + expect(lastClientOpts?.githubToken).toBeUndefined(); + expect(lastClientOpts?.useLoggedInUser).toBe(true); + }); + + test('sendAndWait rejection propagates as thrown error', async () => { + const session = makeFakeSession(); + nextCreateSessionResult = session; + + const p = new CopilotProvider(); + const gen = p.sendQuery('hi', '/w', undefined, { model: 'gpt-5' }); + const first = gen.next(); + await new Promise(resolve => setTimeout(resolve, 5)); + session.rejectSend(new Error('kaboom')); + + await expect( + (async () => { + await first; + for await (const _ of gen) { + /* drain */ + } + })() + ).rejects.toThrow('kaboom'); + }); +}); diff --git a/packages/providers/src/community/copilot/provider.ts b/packages/providers/src/community/copilot/provider.ts new file mode 100644 index 0000000000..de3c019b32 --- /dev/null +++ b/packages/providers/src/community/copilot/provider.ts @@ -0,0 +1,619 @@ +/** + * GitHub Copilot provider (community tier). + * + * Implements `IAgentProvider` on top of @github/copilot-sdk. Resolves auth + + * binary path + reasoning config, translates Archon workflow options + * (tool restrictions, MCP servers, skills, agents, structured output) to the + * SDK's `SessionConfig`, creates or resumes a session, and hands the + * streaming bridge off to `bridgeSession` in `event-bridge.ts`. + * + * Module-scope invariant: type-only imports from @github/copilot-sdk. All + * value imports (`CopilotClient`, `approveAll`) happen inside `sendQuery()` + * via dynamic `await import(...)`. `provider-lazy-load.test.ts` asserts this + * so a future SDK update that reads the filesystem at module load can't + * break compiled-binary bootstrap. + */ +import { createLogger } from '@archon/paths'; +import type { + CopilotClientOptions, + CopilotSession, + CustomAgentConfig, + MCPServerConfig, + SessionConfig, + SystemMessageConfig, +} from '@github/copilot-sdk'; + +import type { + IAgentProvider, + MessageChunk, + ProviderCapabilities, + SendQueryOptions, +} from '../../types'; +import { loadMcpConfig } from '../../claude/provider'; +import { resolveSkillDirectories } from '../../shared/skills'; +import { augmentPromptForJsonSchema } from '../../shared/structured-output'; +import { COPILOT_CAPABILITIES } from './capabilities'; +import { parseCopilotConfig, type CopilotProviderDefaults } from './config'; +import { resolveCopilotBinaryPath } from './binary-resolver'; +import { bridgeSession } from './event-bridge'; + +// `ReasoningEffort` is defined in the SDK but not re-exported from its barrel +// (as of @github/copilot-sdk@0.2.2). Mirror the enum literally so we don't +// depend on an internal subpath. +type CopilotReasoningEffort = 'low' | 'medium' | 'high' | 'xhigh'; + +/** + * Auth env vars, split by intent. + * + * - `COPILOT_GITHUB_TOKEN` — Copilot-specific PAT. Setting it is a strong + * signal of intent ("use this for Copilot"), so it always wins. + * - `GH_TOKEN` / `GITHUB_TOKEN` — generic GitHub tokens. Most users have + * these set for `gh` CLI / clone helpers / webhooks, where classic PATs + * are fine. Those PATs typically lack Copilot entitlement, so picking + * them up automatically yields a misleading "Session was not created + * with authentication info" error from the SDK. We therefore ignore + * these unless the user explicitly opts in via `useLoggedInUser: false`. + */ +const COPILOT_TOKEN_ENV_KEY = 'COPILOT_GITHUB_TOKEN'; +const GENERIC_GITHUB_TOKEN_ENV_KEYS = ['GH_TOKEN', 'GITHUB_TOKEN'] as const; + +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.copilot'); + return cachedLog; +} + +/** + * No-op kept for back-compat with tests that previously called into the + * singleton-reset API. The client is now constructed fresh per `sendQuery()` + * so each request sees correct per-request env vars. + */ +export function resetCopilotSingleton(): void { + // no-op +} + +// ─── Warning collection ───────────────────────────────────────────────────── + +/** Structured provider warning collected during translation; flushed as a system chunk. */ +interface ProviderWarning { + code: string; + message: string; +} + +// ─── Env + auth ───────────────────────────────────────────────────────────── + +/** + * Merge process.env with per-request env vars from the workflow node's + * codebase-scoped env bag. Request env wins — matches the layering + * Claude/Codex use for their SDK env handoff. + */ +function buildCopilotEnv(requestEnv?: Record): Record { + const baseEnv = Object.fromEntries( + Object.entries(process.env).filter((entry): entry is [string, string] => entry[1] !== undefined) + ); + return { ...baseEnv, ...(requestEnv ?? {}) }; +} + +function resolveCopilotToken(env: Record): string | undefined { + const value = env[COPILOT_TOKEN_ENV_KEY]; + return value ? value : undefined; +} + +function resolveGenericGitHubToken(env: Record): string | undefined { + for (const key of GENERIC_GITHUB_TOKEN_ENV_KEYS) { + const value = env[key]; + if (value) return value; + } + return undefined; +} + +// ─── Reasoning ────────────────────────────────────────────────────────────── + +function normalizeReasoning(value: unknown): CopilotReasoningEffort | undefined { + if (value === 'max') return 'xhigh'; + if (value === 'low' || value === 'medium' || value === 'high' || value === 'xhigh') return value; + return undefined; +} + +/** + * Resolve Copilot's `reasoningEffort` from Archon's workflow inputs. + * Precedence: + * nodeConfig.thinking > nodeConfig.effort > config.modelReasoningEffort + * + * Archon's `effort` schema is `'low' | 'medium' | 'high' | 'max'` — we map + * `'max'` to the SDK's `'xhigh'`. The `'off'` sentinel disables reasoning. + * The object form of `thinking` (Claude-specific) returns a warning. + */ +function resolveCopilotReasoning( + nodeConfig: SendQueryOptions['nodeConfig'] | undefined, + copilotConfig: CopilotProviderDefaults +): { effort: CopilotReasoningEffort | undefined; warning?: string } { + if (!nodeConfig) { + return { effort: copilotConfig.modelReasoningEffort }; + } + + const rawThinking = nodeConfig.thinking; + const rawEffort = nodeConfig.effort; + + if (rawThinking === 'off' || rawEffort === 'off') return { effort: undefined }; + + const fromThinking = normalizeReasoning(rawThinking); + if (fromThinking) return { effort: fromThinking }; + + const fromEffort = normalizeReasoning(rawEffort); + if (fromEffort) return { effort: fromEffort }; + + if (rawThinking !== undefined && rawThinking !== null && typeof rawThinking === 'object') { + return { + effort: undefined, + warning: + 'Copilot ignored `thinking` (object form is Claude-specific). Use `effort: low|medium|high|max` instead.', + }; + } + + if (typeof rawThinking === 'string' || typeof rawEffort === 'string') { + const offender = typeof rawThinking === 'string' ? rawThinking : rawEffort; + return { + effort: undefined, + warning: `Copilot ignored unknown reasoning level '${String(offender)}'. Valid: low, medium, high, xhigh, max, off.`, + }; + } + + // Fall back to config-level default when nodeConfig provides nothing actionable. + return { effort: copilotConfig.modelReasoningEffort }; +} + +// ─── System prompt ────────────────────────────────────────────────────────── + +function resolveSystemMessage(requestOptions?: SendQueryOptions): SystemMessageConfig | undefined { + const requestPrompt = requestOptions?.systemPrompt; + const nodePrompt = + typeof requestOptions?.nodeConfig?.systemPrompt === 'string' + ? requestOptions.nodeConfig.systemPrompt + : undefined; + const content = requestPrompt ?? nodePrompt; + if (typeof content === 'string' && content.length > 0) { + return { mode: 'append', content }; + } + return undefined; +} + +// ─── Translations ─────────────────────────────────────────────────────────── + +/** + * Translate Archon's per-node `allowed_tools` / `denied_tools` to Copilot's + * `availableTools` / `excludedTools`. Copilot's spec: `availableTools` takes + * precedence over `excludedTools`; we pass both through when present and let + * the SDK enforce precedence. + */ +function applyToolRestrictions( + sessionConfig: SessionConfig, + nodeConfig: SendQueryOptions['nodeConfig'] +): void { + if (!nodeConfig) return; + if (nodeConfig.allowed_tools !== undefined) { + sessionConfig.availableTools = nodeConfig.allowed_tools; + } + if (nodeConfig.denied_tools !== undefined) { + sessionConfig.excludedTools = nodeConfig.denied_tools; + } +} + +/** + * Translate Archon's `nodeConfig.mcp` (JSON-file path) to Copilot's + * `SessionConfig.mcpServers`. Reuses Claude's `loadMcpConfig` so env-var + * expansion and missing-var detection behave consistently across providers. + */ +async function applyMcpServers( + sessionConfig: SessionConfig, + nodeConfig: SendQueryOptions['nodeConfig'], + cwd: string, + warnings: ProviderWarning[] +): Promise { + const mcpPath = nodeConfig?.mcp; + if (typeof mcpPath !== 'string' || mcpPath.length === 0) return; + + const { servers, serverNames, missingVars } = await loadMcpConfig(mcpPath, cwd); + + if (missingVars.length > 0) { + warnings.push({ + code: 'copilot.mcp_env_vars_missing', + message: `Copilot MCP config references undefined env vars: ${missingVars.join(', ')}. Servers using them may fail at runtime.`, + }); + } + + sessionConfig.mcpServers = servers as Record; + getLog().info({ serverNames, missingVars }, 'copilot.mcp_loaded'); +} + +/** + * Translate Archon's `nodeConfig.skills` (string names) to Copilot's + * `SessionConfig.skillDirectories` (absolute paths). Unresolved names become + * a single system warning so the user notices the typo/missing skill. + */ +function applySkills( + sessionConfig: SessionConfig, + nodeConfig: SendQueryOptions['nodeConfig'], + cwd: string, + warnings: ProviderWarning[] +): void { + if (!nodeConfig?.skills || nodeConfig.skills.length === 0) return; + + const { paths, missing } = resolveSkillDirectories(cwd, nodeConfig.skills); + + if (missing.length > 0) { + warnings.push({ + code: 'copilot.skills_missing', + message: `Copilot ignored missing skills: ${missing.join(', ')}. Expected a directory with SKILL.md under .agents/skills/ or .claude/skills/ (project or home).`, + }); + } + + if (paths.length > 0) { + sessionConfig.skillDirectories = paths; + } + getLog().info({ resolved: paths.length, missing }, 'copilot.skills_resolved'); +} + +/** + * Translate Archon's `nodeConfig.agents` (Record) to + * Copilot's `SessionConfig.customAgents`. Only the fields Copilot's + * `CustomAgentConfig` supports pass through (description, prompt, tools). + * Archon agent fields Copilot cannot represent (`model`, `disallowedTools`, + * `skills`, `maxTurns`) surface as one consolidated warning per agent. + * + * We do NOT set `SessionConfig.agent` — Archon's workflow model invokes + * sub-agents via the Task tool, not by switching active agent at session + * start. + */ +function applyAgents( + sessionConfig: SessionConfig, + nodeConfig: SendQueryOptions['nodeConfig'], + warnings: ProviderWarning[] +): void { + const agents = nodeConfig?.agents; + if (!agents) return; + const entries = Object.entries(agents); + if (entries.length === 0) return; + + const customAgents: CustomAgentConfig[] = entries.map(([name, def]) => { + const ignored: string[] = []; + if (def.model !== undefined) ignored.push('model'); + if (def.disallowedTools !== undefined) ignored.push('disallowedTools'); + if (def.skills !== undefined) ignored.push('skills'); + if (def.maxTurns !== undefined) ignored.push('maxTurns'); + + if (ignored.length > 0) { + warnings.push({ + code: 'copilot.agent_fields_ignored', + message: `Copilot agent '${name}' ignored unsupported fields: ${ignored.join(', ')}. Copilot supports description, prompt, tools (allowlist) only.`, + }); + } + + return { + name, + description: def.description, + prompt: def.prompt, + ...(def.tools !== undefined ? { tools: def.tools } : {}), + }; + }); + + sessionConfig.customAgents = customAgents; + getLog().info( + { count: customAgents.length, names: customAgents.map(a => a.name) }, + 'copilot.agents_registered' + ); +} + +// ─── SessionConfig assembly ───────────────────────────────────────────────── + +/** + * Single construction site for the Copilot SessionConfig. Callers add new + * translations as `applyX(sessionConfig, ..., warnings)` calls below — keep + * business logic here straight-through. + */ +async function buildSessionConfig( + copilotConfig: CopilotProviderDefaults, + requestOptions: SendQueryOptions | undefined, + cwd: string, + approveAll: SessionConfig['onPermissionRequest'], + warnings: ProviderWarning[] +): Promise { + const reasoning = resolveCopilotReasoning(requestOptions?.nodeConfig, copilotConfig); + if (reasoning.warning) { + warnings.push({ code: 'copilot.reasoning_ignored', message: reasoning.warning }); + } + + const requestedModel = requestOptions?.model?.trim() || undefined; + const defaultModel = copilotConfig.model?.trim() || undefined; + // Default to 'auto' so Copilot picks a model when neither request nor + // config names one. Matches the shipping Copilot CLI default. + const resolvedModel = requestedModel ?? defaultModel ?? 'auto'; + + const sessionConfig: SessionConfig = { + model: resolvedModel, + reasoningEffort: reasoning.effort, + workingDirectory: cwd, + configDir: copilotConfig.configDir, + streaming: true, + systemMessage: resolveSystemMessage(requestOptions), + enableConfigDiscovery: copilotConfig.enableConfigDiscovery ?? false, + onPermissionRequest: approveAll, + }; + + applyToolRestrictions(sessionConfig, requestOptions?.nodeConfig); + await applyMcpServers(sessionConfig, requestOptions?.nodeConfig, cwd, warnings); + applySkills(sessionConfig, requestOptions?.nodeConfig, cwd, warnings); + applyAgents(sessionConfig, requestOptions?.nodeConfig, warnings); + + return sessionConfig; +} + +// ─── Error classification ────────────────────────────────────────────────── + +/** Best-effort stringify that never yields '[object Object]'. */ +function safeErrorString(value: unknown): string { + if (value === undefined || value === null) return 'Unknown error'; + if (typeof value === 'string') return value || 'Unknown error'; + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + try { + const json = JSON.stringify(value); + if (json && json !== '{}') return json; + } catch { + /* fall through */ + } + return 'Unknown error'; +} + +function isModelAccessError(errorMessage: string): boolean { + const normalized = errorMessage.toLowerCase(); + const hasModel = normalized.includes('model'); + const hasAvailabilitySignal = + normalized.includes('not available') || + normalized.includes('not found') || + normalized.includes('unsupported'); + return hasModel && hasAvailabilitySignal; +} + +/** + * Classify common Copilot failure modes and return a more actionable Error. + * Combines the thrown message with any `lastSessionError` collected via the + * SDK's `session.error` event — the latter often carries the specific + * model-access / auth detail while the thrown error is generic. + */ +function buildFriendlyCopilotError(error: unknown, lastSessionError?: string): Error { + const thrownMessage = + error instanceof Error && error.message ? error.message : safeErrorString(error); + const parts = [thrownMessage, lastSessionError].filter( + (m): m is string => typeof m === 'string' && m.length > 0 + ); + const combined = parts.join('\n'); + + if (isModelAccessError(combined)) { + return new Error( + `Copilot model access error: ${combined}\n\n` + + 'Try a different model in the workflow node or set assistants.copilot.model in .archon/config.yaml.' + ); + } + + const normalized = combined.toLowerCase(); + if ( + normalized.includes('auth') || + normalized.includes('login') || + normalized.includes('unauthorized') || + normalized.includes('forbidden') + ) { + return new Error( + `Copilot authentication failed: ${combined}\n\n` + + 'Run `copilot login` (default), set COPILOT_GITHUB_TOKEN, or set ' + + '`useLoggedInUser: false` in `.archon/config.yaml` to use GH_TOKEN / GITHUB_TOKEN.' + ); + } + + return error instanceof Error ? error : new Error(combined); +} + +// ─── Provider class ───────────────────────────────────────────────────────── + +/** + * GitHub Copilot community provider. Implements `IAgentProvider` on top of + * `@github/copilot-sdk`, translating Archon workflow options (tools, MCP, + * skills, agents, structured output, reasoning) to the SDK's `SessionConfig`, + * bridging its event stream via `bridgeSession()`, and surfacing provider + * signals (translation warnings, fork workaround, resume fallback) to the + * caller. Each `sendQuery()` constructs a fresh `CopilotClient` so + * per-request env vars are honored. + */ +export class CopilotProvider implements IAgentProvider { + getType(): string { + return 'copilot'; + } + + getCapabilities(): ProviderCapabilities { + return COPILOT_CAPABILITIES; + } + + async *sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + requestOptions?: SendQueryOptions + ): AsyncGenerator { + const log = getLog(); + + // forkSession / persistSession are boolean flags the executor may set in + // normal operation; log-warn rather than throw — throwing would block + // ordinary session reuse. + if (requestOptions?.forkSession !== undefined) { + log.debug( + { option: 'forkSession', value: requestOptions.forkSession }, + 'copilot.option_not_supported' + ); + } + if (requestOptions?.persistSession !== undefined) { + log.debug( + { option: 'persistSession', value: requestOptions.persistSession }, + 'copilot.option_not_supported' + ); + } + + const assistantConfig = requestOptions?.assistantConfig ?? {}; + const copilotConfig = parseCopilotConfig(assistantConfig); + + const mergedEnv = buildCopilotEnv(requestOptions?.env); + const copilotToken = resolveCopilotToken(mergedEnv); + const genericGithubToken = resolveGenericGitHubToken(mergedEnv); + const cliPath = await resolveCopilotBinaryPath(copilotConfig.copilotCliPath); + + const sdk = await import('@github/copilot-sdk'); + const { CopilotClient: copilotClientCtor, approveAll } = sdk; + + const warnings: ProviderWarning[] = []; + const sessionConfig = await buildSessionConfig( + copilotConfig, + requestOptions, + cwd, + approveAll, + warnings + ); + + // Flush translation warnings before session creation so the user sees + // them even if session construction fails. + for (const w of warnings) { + yield { type: 'system', content: `⚠️ ${w.message}` }; + } + + // Best-effort structured output: Copilot has no native JSON-mode, so we + // augment the prompt with the schema. bridgeSession parses the + // accumulated assistant transcript and attaches `structuredOutput` to + // the terminal result chunk. + const outputFormat = requestOptions?.outputFormat; + const wantsStructured = outputFormat?.type === 'json_schema'; + const effectivePrompt = wantsStructured + ? augmentPromptForJsonSchema(prompt, outputFormat.schema) + : prompt; + + const clientOpts: CopilotClientOptions = { + cwd, + env: mergedEnv, + }; + if (cliPath) clientOpts.cliPath = cliPath; + // Auth precedence: see COPILOT_TOKEN_ENV_KEY / GENERIC_GITHUB_TOKEN_ENV_KEYS docs. + let tokenSource: 'copilot-token' | 'generic-token' | 'logged-in-user'; + if (copilotToken) { + clientOpts.githubToken = copilotToken; + clientOpts.useLoggedInUser = false; + tokenSource = 'copilot-token'; + } else if (copilotConfig.useLoggedInUser === false) { + if (genericGithubToken) { + clientOpts.githubToken = genericGithubToken; + tokenSource = 'generic-token'; + } else { + tokenSource = 'logged-in-user'; + } + clientOpts.useLoggedInUser = false; + } else { + clientOpts.useLoggedInUser = true; + tokenSource = 'logged-in-user'; + } + if (copilotConfig.logLevel) clientOpts.logLevel = copilotConfig.logLevel; + const client = new copilotClientCtor(clientOpts); + + let session: CopilotSession; + let resumeFailed = false; + let forkedToFresh = false; + // Archon's dag-executor sets `forkSession: true` on every reuse so retries + // start from the pre-node conversation state. The Copilot SDK has no fork + // API — resumeSession mutates the source session in place. When fork is + // requested we therefore create a fresh session rather than pollute the + // source with retry attempts. That loses the prior conversation context, + // but preserves retry correctness (which is what the executor cares about). + const wantsFork = requestOptions?.forkSession === true; + try { + if (resumeSessionId && !wantsFork) { + log.debug({ sessionId: resumeSessionId, cwd }, 'copilot.resume_attempt'); + try { + session = await client.resumeSession(resumeSessionId, sessionConfig); + } catch (err) { + log.debug( + { err, sessionId: resumeSessionId }, + 'copilot.resume_failed_falling_back_to_create' + ); + resumeFailed = true; + session = await client.createSession(sessionConfig); + } + } else { + if (resumeSessionId && wantsFork) { + log.warn( + { requestedResumeSessionId: resumeSessionId }, + 'copilot.fork_unsupported_creating_fresh_session' + ); + forkedToFresh = true; + } else { + log.debug({ cwd }, 'copilot.create_session'); + } + session = await client.createSession(sessionConfig); + } + } catch (err) { + // Can't connect / create — surface a friendly error and stop the client. + try { + await client.stop(); + } catch (stopErr) { + log.debug({ err: stopErr }, 'copilot.client_stop_failed_after_session_error'); + } + throw buildFriendlyCopilotError(err); + } + + if (resumeFailed) { + yield { + type: 'system', + content: '⚠️ Could not resume Copilot session — starting a fresh conversation.', + }; + } else if (forkedToFresh) { + yield { + type: 'system', + content: + '⚠️ Copilot SDK does not support session forking; starting a fresh conversation to keep retries safe.', + }; + } + + log.info( + { + sessionId: session.sessionId, + model: sessionConfig.model, + cwd, + reasoningEffort: sessionConfig.reasoningEffort, + hasSystemMessage: sessionConfig.systemMessage !== undefined, + mcpServers: sessionConfig.mcpServers ? Object.keys(sessionConfig.mcpServers).length : 0, + skills: sessionConfig.skillDirectories?.length ?? 0, + agents: sessionConfig.customAgents?.length ?? 0, + tokenSource, + resumed: resumeSessionId !== undefined && !resumeFailed, + }, + 'copilot.session_started' + ); + + try { + yield* bridgeSession( + session, + effectivePrompt, + requestOptions?.abortSignal, + wantsStructured ? outputFormat.schema : undefined + ); + log.info({ sessionId: session.sessionId }, 'copilot.prompt_completed'); + } catch (err) { + log.error({ err, sessionId: session.sessionId }, 'copilot.prompt_failed'); + throw buildFriendlyCopilotError(err); + } finally { + // Stop the client so its CLI subprocess shuts down; bridgeSession already + // handled session.abort() + session.disconnect() in its own finally. + try { + const stopErrors = await client.stop(); + if (stopErrors.length > 0) { + log.warn({ errors: stopErrors.map(e => e.message) }, 'copilot.client_stop_errors'); + } + } catch (stopErr) { + log.debug({ err: stopErr }, 'copilot.client_stop_threw'); + } + } + } +} diff --git a/packages/providers/src/community/copilot/registration.ts b/packages/providers/src/community/copilot/registration.ts new file mode 100644 index 0000000000..9db14c9bf2 --- /dev/null +++ b/packages/providers/src/community/copilot/registration.ts @@ -0,0 +1,24 @@ +import { isRegisteredProvider, registerProvider } from '../../registry'; + +import { COPILOT_CAPABILITIES } from './capabilities'; +import { CopilotProvider } from './provider'; + +/** + * Register the GitHub Copilot community provider. + * + * Idempotent — safe to call multiple times, so process entrypoints (CLI, + * server, config-loader) can each call it without coordination. Kept + * separate from `registerBuiltinProviders()` because `builtIn: false` is + * load-bearing: Copilot is a community provider and must not be conflated + * with core providers until it's explicitly promoted. + */ +export function registerCopilotProvider(): void { + if (isRegisteredProvider('copilot')) return; + registerProvider({ + id: 'copilot', + displayName: 'Copilot (GitHub)', + factory: () => new CopilotProvider(), + capabilities: COPILOT_CAPABILITIES, + builtIn: false, + }); +} diff --git a/packages/providers/src/community/pi/event-bridge.test.ts b/packages/providers/src/community/pi/event-bridge.test.ts index d0bf9a35b7..f9797488ce 100644 --- a/packages/providers/src/community/pi/event-bridge.test.ts +++ b/packages/providers/src/community/pi/event-bridge.test.ts @@ -401,6 +401,15 @@ describe('tryParseStructuredOutput', () => { expect(tryParseStructuredOutput(' ')).toBeUndefined(); }); + test('returns undefined for valid JSON that is not an object', () => { + // Schema augmentation always asks for an object — bare primitives are + // valid JSON but not "structured output". + expect(tryParseStructuredOutput('null')).toBeUndefined(); + expect(tryParseStructuredOutput('42')).toBeUndefined(); + expect(tryParseStructuredOutput('"answer"')).toBeUndefined(); + expect(tryParseStructuredOutput('true')).toBeUndefined(); + }); + test('returns undefined when model wraps JSON in prose with trailing text', () => { // Caller degrades via the executor's missing-structured-output warning. // Forward scan starts at the JSON object but JSON.parse rejects the diff --git a/packages/providers/src/community/pi/event-bridge.ts b/packages/providers/src/community/pi/event-bridge.ts index 4adde52809..3a6c3973fd 100644 --- a/packages/providers/src/community/pi/event-bridge.ts +++ b/packages/providers/src/community/pi/event-bridge.ts @@ -151,55 +151,11 @@ export function buildResultChunk(messages: readonly unknown[]): MessageChunk { return chunk; } -/** - * Attempt to parse a Pi assistant transcript as the structured-output JSON - * requested via `outputFormat`. Handles three common model failure modes: - * - trailing/leading whitespace (always stripped) - * - markdown code fences (```json ... ``` or bare ``` ... ```) that models - * emit despite the "no code fences" instruction in the prompt - * - prose preamble followed by a single trailing JSON object — pattern - * observed on Minimax M2.7 ("Now I have all the inputs. Let me evaluate - * the three gates: ... {...}"). Reasoning models tend to "think out loud" - * before emitting structured output despite explicit JSON-only prompts. - * - * Returns the parsed value on success, `undefined` on any failure. Callers - * treat `undefined` as "structured output unavailable" and degrade via the - * dag-executor's existing missing-structured-output warning. - */ -export function tryParseStructuredOutput(text: string): unknown { - const trimmed = text.trim(); - if (trimmed.length === 0) return undefined; - // Strip ```json / ``` fences if present. Match only at boundaries so we - // don't mangle JSON strings that legitimately contain backticks. - const cleaned = trimmed - .replace(/^```(?:json)?\s*\n?/i, '') - .replace(/\n?\s*```\s*$/, '') - .trim(); - - // Tier 1: clean parse — fast path for fully compliant outputs. - try { - return JSON.parse(cleaned); - } catch { - // fall through - } - - // Tier 2: scan forward to the FIRST `{` and parse from there. Recovers the - // preamble-then-JSON pattern reasoning models emit. A backward scan from - // the last `{` was considered but rejected: it silently returns the wrong - // object when the prose contains a brace-bearing example after the real - // payload (e.g. `{"actual":1}\nFor example: {"x":2}` would yield `{x:2}`), - // breaking the conservative-failure contract callers rely on. - const firstBrace = cleaned.indexOf('{'); - if (firstBrace > 0) { - try { - return JSON.parse(cleaned.slice(firstBrace)); - } catch { - // fall through - } - } - - return undefined; -} +// Structured-output parsing is shared across providers. Import once for local +// use and re-export so existing callers and tests keep their import path +// stable; new providers should import from `../../shared/structured-output`. +export { tryParseStructuredOutput } from '../../shared/structured-output'; +import { tryParseStructuredOutput } from '../../shared/structured-output'; /** * Pure mapper from Pi's `AgentSessionEvent` → zero-or-more Archon `MessageChunk`s. diff --git a/packages/providers/src/community/pi/options-translator.ts b/packages/providers/src/community/pi/options-translator.ts index d970985f4e..1e26581961 100644 --- a/packages/providers/src/community/pi/options-translator.ts +++ b/packages/providers/src/community/pi/options-translator.ts @@ -1,7 +1,3 @@ -import { existsSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { join } from 'node:path'; - import { codingTools, createBashTool, @@ -249,79 +245,8 @@ export function resolvePiTools( // ─── Skills ──────────────────────────────────────────────────────────────── -export interface ResolvedSkills { - /** Absolute paths to resolved skill directories. Each contains a SKILL.md. */ - paths: string[]; - /** Skill names that couldn't be resolved in any search location. */ - missing: string[]; -} - -/** - * Pi's skill-discovery search order for a named skill. Mirrors the locations - * Claude's SDK and Pi's default resource loader both respect, so Archon - * workflows that already work under Claude find the same skills under Pi. - * - * Order (first match wins per name): - * 1. `/.agents/skills//` — project-local, agentskills.io standard - * 2. `/.claude/skills//` — project-local, Claude convention - * 3. `~/.agents/skills//` — user-global, agentskills.io standard - * 4. `~/.claude/skills//` — user-global, Claude convention - * - * Ancestor traversal above cwd is deliberately not done in v2 — matches the - * Pi provider's cwd-bound scope and avoids ambiguity about which repo's - * skills win when Archon runs out of a subdirectory. - */ -function skillSearchRoots(cwd: string): string[] { - // Prefer `HOME` env var when set — Bun's os.homedir() bypasses `HOME` and - // reads from the system uid lookup, which is correct in production but - // makes tests using staged temp homes impossible. The fallback to - // homedir() keeps behavior identical in non-test contexts. - const home = process.env.HOME ?? homedir(); - return [ - join(cwd, '.agents', 'skills'), - join(cwd, '.claude', 'skills'), - join(home, '.agents', 'skills'), - join(home, '.claude', 'skills'), - ]; -} - -/** - * Resolve Archon's name-based `skills:` nodeConfig references to absolute - * directory paths Pi's resource loader can consume via `additionalSkillPaths`. - * - * Each named skill is expected to be a directory containing a `SKILL.md` - * file — the agentskills.io standard layout. - */ -export function resolvePiSkills(cwd: string, skillNames: string[] | undefined): ResolvedSkills { - if (!skillNames || skillNames.length === 0) { - return { paths: [], missing: [] }; - } - - const roots = skillSearchRoots(cwd); - const paths: string[] = []; - const missing: string[] = []; - const seen = new Set(); - - for (const rawName of skillNames) { - if (typeof rawName !== 'string' || rawName.length === 0) continue; - if (seen.has(rawName)) continue; - seen.add(rawName); - - let found: string | undefined; - for (const root of roots) { - const candidate = join(root, rawName); - if (existsSync(join(candidate, 'SKILL.md'))) { - found = candidate; - break; - } - } - - if (found) { - paths.push(found); - } else { - missing.push(rawName); - } - } - - return { paths, missing }; -} +// Skill resolution is shared across providers. Re-export `resolvePiSkills` as +// an alias of the shared `resolveSkillDirectories` so existing Pi callers and +// tests keep their import path stable. +export { resolveSkillDirectories as resolvePiSkills } from '../../shared/skills'; +export type { ResolvedSkills } from '../../shared/skills'; diff --git a/packages/providers/src/community/pi/provider.ts b/packages/providers/src/community/pi/provider.ts index 5a14ed6166..c240747bf1 100644 --- a/packages/providers/src/community/pi/provider.ts +++ b/packages/providers/src/community/pi/provider.ts @@ -94,29 +94,11 @@ function getLog(): ReturnType { return cachedLog; } -/** - * Append a "respond with JSON matching this schema" instruction to the user - * prompt so Pi-backed models produce parseable structured output. Pi's SDK - * has no JSON-mode equivalent to Claude's outputFormat or Codex's - * outputSchema, so this is a best-effort fallback: the event bridge parses - * the assistant transcript on agent_end. Models that reliably follow - * instruction (GPT-5, Claude, Gemini 2.x, recent Qwen Coder, DeepSeek V3) - * return clean JSON; models that don't produce a parse failure, which the - * executor surfaces via the existing dag.structured_output_missing warning. - */ -export function augmentPromptForJsonSchema( - prompt: string, - schema: Record -): string { - return `${prompt} - ---- - -CRITICAL: Respond with ONLY a JSON object matching the schema below. No prose before or after the JSON. No markdown code fences. Just the raw JSON object as your final message. - -Schema: -${JSON.stringify(schema, null, 2)}`; -} +// Structured-output prompt augmentation is shared across providers. Import +// once for local use and re-export so existing callers and tests keep their +// import path stable; new providers should import from `../../shared/structured-output`. +import { augmentPromptForJsonSchema } from '../../shared/structured-output'; +export { augmentPromptForJsonSchema }; /** * Pi community provider — wraps `@mariozechner/pi-coding-agent`'s full diff --git a/packages/providers/src/index.ts b/packages/providers/src/index.ts index d430f8d402..3cd4912d79 100644 --- a/packages/providers/src/index.ts +++ b/packages/providers/src/index.ts @@ -53,3 +53,15 @@ export { registerPiProvider, type PiProviderDefaults, } from './community/pi'; + +export { + CopilotProvider, + parseCopilotConfig, + registerCopilotProvider, + resetCopilotSingleton, + type CopilotProviderDefaults, +} from './community/copilot'; +export { + resolveCopilotBinaryPath, + fileExists as copilotFileExists, +} from './community/copilot/binary-resolver'; diff --git a/packages/providers/src/registry.test.ts b/packages/providers/src/registry.test.ts index ee3e04ee04..29083481c7 100644 --- a/packages/providers/src/registry.test.ts +++ b/packages/providers/src/registry.test.ts @@ -12,6 +12,7 @@ import { clearRegistry, } from './registry'; import { registerPiProvider } from './community/pi/registration'; +import { registerCopilotProvider } from './community/copilot/registration'; import { UnknownProviderError } from './errors'; import type { ProviderRegistration, IAgentProvider, ProviderCapabilities } from './types'; @@ -252,16 +253,17 @@ describe('registry', () => { describe('registerCommunityProviders (aggregator)', () => { test('registers all bundled community providers', () => { registerCommunityProviders(); - // Pi is currently the only community provider bundled. When more are - // added, they should appear here automatically. expect(isRegisteredProvider('pi')).toBe(true); + expect(isRegisteredProvider('copilot')).toBe(true); }); test('is idempotent', () => { registerCommunityProviders(); expect(() => registerCommunityProviders()).not.toThrow(); const piCount = getRegisteredProviders().filter(p => p.id === 'pi').length; + const copilotCount = getRegisteredProviders().filter(p => p.id === 'copilot').length; expect(piCount).toBe(1); + expect(copilotCount).toBe(1); }); }); @@ -318,4 +320,53 @@ describe('registry', () => { expect(ids).toEqual(['claude', 'codex', 'pi']); }); }); + + describe('registerCopilotProvider (community provider)', () => { + test('registers copilot with builtIn: false', () => { + registerCopilotProvider(); + const reg = getRegistration('copilot'); + expect(reg.id).toBe('copilot'); + expect(reg.displayName).toBe('Copilot (GitHub)'); + expect(reg.builtIn).toBe(false); + }); + + test('is idempotent', () => { + registerCopilotProvider(); + expect(() => registerCopilotProvider()).not.toThrow(); + const entries = getRegisteredProviders().filter(p => p.id === 'copilot'); + expect(entries).toHaveLength(1); + }); + + test('declares conservative capabilities', () => { + registerCopilotProvider(); + const caps = getProviderCapabilities('copilot'); + expect(caps.sessionResume).toBe(true); + expect(caps.envInjection).toBe(true); + expect(caps.effortControl).toBe(true); + expect(caps.thinkingControl).toBe(true); + expect(caps.mcp).toBe(true); + expect(caps.hooks).toBe(false); + expect(caps.skills).toBe(true); + expect(caps.toolRestrictions).toBe(true); + expect(caps.structuredOutput).toBe(true); + expect(caps.agents).toBe(true); + expect(caps.fallbackModel).toBe(false); + expect(caps.sandbox).toBe(false); + }); + + test('appears in getProviderInfoList with builtIn: false', () => { + registerCopilotProvider(); + const info = getProviderInfoList().find(p => p.id === 'copilot'); + expect(info).toBeDefined(); + expect(info?.builtIn).toBe(false); + }); + + test('does not collide with built-ins', () => { + registerCopilotProvider(); + const ids = getRegisteredProviders() + .map(p => p.id) + .sort(); + expect(ids).toEqual(['claude', 'codex', 'copilot']); + }); + }); }); diff --git a/packages/providers/src/registry.ts b/packages/providers/src/registry.ts index 7006ab4961..c92efb780b 100644 --- a/packages/providers/src/registry.ts +++ b/packages/providers/src/registry.ts @@ -17,6 +17,7 @@ import { ClaudeProvider } from './claude/provider'; import { CodexProvider } from './codex/provider'; import { CLAUDE_CAPABILITIES } from './claude/capabilities'; import { CODEX_CAPABILITIES } from './codex/capabilities'; +import { registerCopilotProvider } from './community/copilot/registration'; import { registerPiProvider } from './community/pi/registration'; import { UnknownProviderError } from './errors'; import { createLogger } from '@archon/paths'; @@ -153,6 +154,7 @@ export function registerBuiltinProviders(): void { */ export function registerCommunityProviders(): void { registerPiProvider(); + registerCopilotProvider(); } /** @internal Test-only — clears the registry. Not for production use. */ diff --git a/packages/providers/src/shared/skills.ts b/packages/providers/src/shared/skills.ts new file mode 100644 index 0000000000..32bec70270 --- /dev/null +++ b/packages/providers/src/shared/skills.ts @@ -0,0 +1,91 @@ +import { existsSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { basename, isAbsolute, join } from 'node:path'; + +export interface ResolvedSkills { + /** Absolute paths to resolved skill directories. Each contains a SKILL.md. */ + paths: string[]; + /** Skill names that couldn't be resolved in any search location. */ + missing: string[]; +} + +/** + * Skill-discovery search order for a named skill. Mirrors the locations + * Claude's SDK and Pi's default resource loader both respect, so Archon + * workflows that already work under Claude find the same skills under any + * provider that calls this resolver. + * + * Order (first match wins per name): + * 1. `/.agents/skills//` — project-local, agentskills.io standard + * 2. `/.claude/skills//` — project-local, Claude convention + * 3. `~/.agents/skills//` — user-global, agentskills.io standard + * 4. `~/.claude/skills//` — user-global, Claude convention + * + * Ancestor traversal above cwd is deliberately not done — matches Pi's + * cwd-bound scope and avoids ambiguity about which repo's skills win when + * Archon runs out of a subdirectory. + */ +function skillSearchRoots(cwd: string): string[] { + // Prefer `HOME` env var when set — Bun's os.homedir() bypasses `HOME` and + // reads from the system uid lookup, which is correct in production but + // makes tests using staged temp homes impossible. + const home = process.env.HOME ?? homedir(); + return [ + join(cwd, '.agents', 'skills'), + join(cwd, '.claude', 'skills'), + join(home, '.agents', 'skills'), + join(home, '.claude', 'skills'), + ]; +} + +/** + * Resolve Archon's name-based `skills:` nodeConfig references to absolute + * directory paths. Each named skill is expected to be a directory containing + * a `SKILL.md` file — the agentskills.io standard layout. + * + * Duplicate names are de-duped; empty/non-string entries are skipped. + * Unresolved names are returned in `missing` for caller-side warning. + */ +export function resolveSkillDirectories( + cwd: string, + skillNames: string[] | undefined +): ResolvedSkills { + if (!skillNames || skillNames.length === 0) { + return { paths: [], missing: [] }; + } + + const roots = skillSearchRoots(cwd); + const paths: string[] = []; + const missing: string[] = []; + const seen = new Set(); + + for (const rawName of skillNames) { + if (typeof rawName !== 'string') continue; + const name = rawName.trim(); + if (name.length === 0) continue; + // Name-only contract: reject path traversal, nested paths, and absolute paths. + if (isAbsolute(name) || basename(name) !== name || name === '.' || name === '..') { + missing.push(rawName); + continue; + } + if (seen.has(name)) continue; + seen.add(name); + + let found: string | undefined; + for (const root of roots) { + const candidate = join(root, name); + if (existsSync(join(candidate, 'SKILL.md'))) { + found = candidate; + break; + } + } + + if (found) { + paths.push(found); + } else { + missing.push(rawName); + } + } + + return { paths, missing }; +} diff --git a/packages/providers/src/shared/structured-output.ts b/packages/providers/src/shared/structured-output.ts new file mode 100644 index 0000000000..8b86d2ec5b --- /dev/null +++ b/packages/providers/src/shared/structured-output.ts @@ -0,0 +1,93 @@ +/** + * Shared best-effort structured-output helpers for providers that have no + * native JSON-mode equivalent to Claude's `outputFormat` or Codex's + * `outputSchema`. The approach is two-step: + * + * 1. Augment the user prompt with a "respond with JSON matching this schema" + * instruction, so instruction-following models emit parseable JSON. + * 2. After the run completes, parse the accumulated assistant transcript. + * + * Models that reliably follow instruction (GPT-5, Claude, Gemini 2.x, recent + * Qwen Coder, DeepSeek V3) return clean JSON; models that don't produce a + * parse failure, which the executor surfaces via the existing + * `dag.structured_output_missing` warning. + */ + +/** + * Append a "respond with JSON matching this schema" instruction to the user + * prompt. Same wording originally authored for Pi — reused verbatim so + * prompt drift across providers is zero. + */ +export function augmentPromptForJsonSchema( + prompt: string, + schema: Record +): string { + return `${prompt} + +--- + +CRITICAL: Respond with ONLY a JSON object matching the schema below. No prose before or after the JSON. No markdown code fences. Just the raw JSON object as your final message. + +Schema: +${JSON.stringify(schema, null, 2)}`; +} + +/** + * Attempt to parse an assistant transcript as the structured-output JSON. + * Handles three common model failure modes: + * - trailing/leading whitespace (always stripped) + * - markdown code fences (```json ... ``` or bare ``` ... ```) that models + * emit despite the "no code fences" instruction in the prompt + * - prose preamble followed by a single trailing JSON object — pattern + * observed on Minimax M2.7 reasoning models that "think out loud" before + * emitting structured output despite explicit JSON-only prompts. + * + * Returns the parsed value on success, `undefined` on any failure. Callers + * treat `undefined` as "structured output unavailable" and degrade via the + * dag-executor's existing missing-structured-output warning. + */ +export function tryParseStructuredOutput(text: string): unknown { + const trimmed = text.trim(); + if (trimmed.length === 0) return undefined; + // Strip ```json / ``` fences if present. Match only at boundaries so we + // don't mangle JSON strings that legitimately contain backticks. + const cleaned = trimmed + .replace(/^```(?:json)?\s*\n?/i, '') + .replace(/\n?\s*```\s*$/, '') + .trim(); + + // Tier 1: clean parse — fast path for fully compliant outputs. + const tier1 = tryJsonParseObject(cleaned); + if (tier1 !== undefined) return tier1; + + // Tier 2: scan forward to the FIRST `{` and parse from there. Recovers the + // preamble-then-JSON pattern reasoning models emit. A backward scan from + // the last `{` was considered but rejected: it silently returns the wrong + // object when the prose contains a brace-bearing example after the real + // payload (e.g. `{"actual":1}\nFor example: {"x":2}` would yield `{x:2}`), + // breaking the conservative-failure contract callers rely on. + const firstBrace = cleaned.indexOf('{'); + if (firstBrace > 0) { + const tier2 = tryJsonParseObject(cleaned.slice(firstBrace)); + if (tier2 !== undefined) return tier2; + } + + return undefined; +} + +/** + * Parse `text` as JSON and only return it if the result is a non-null + * object (or array). Schema augmentation always asks for an object — bare + * `null`, numbers, and strings parse cleanly but are not "structured + * output", so we treat them as missing and let the dag-executor's + * structured_output_missing path engage. + */ +function tryJsonParseObject(text: string): unknown { + try { + const parsed: unknown = JSON.parse(text); + if (parsed === null || typeof parsed !== 'object') return undefined; + return parsed; + } catch { + return undefined; + } +} diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts index fe47eff6c4..6d9f285a21 100644 --- a/packages/providers/src/types.ts +++ b/packages/providers/src/types.ts @@ -31,6 +31,49 @@ export interface CodexProviderDefaults { codexBinaryPath?: string; } +/** + * Community provider defaults for GitHub Copilot (@github/copilot-sdk). + */ +export interface CopilotProviderDefaults { + [key: string]: unknown; + /** Default model ref, e.g. 'gpt-5', 'gpt-5-mini', 'claude-sonnet-4.5'. */ + model?: string; + /** + * Reasoning effort passed to the SDK as `reasoningEffort`. Field name + * mirrors `CodexProviderDefaults.modelReasoningEffort` so users get one + * consistent key across cross-provider configs. + */ + modelReasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'; + /** + * Absolute path to the Copilot CLI binary. Required in compiled Archon + * builds when `COPILOT_BIN_PATH` env var is not set. Dev-mode builds let + * the SDK resolve from `$PATH`. + */ + copilotCliPath?: string; + /** + * Override Copilot's config directory. When unset the SDK uses its own + * default (typically `~/.copilot`). + */ + configDir?: string; + /** + * Opt in to Copilot's config discovery from the repo (MCP servers, skills, + * etc. declared in the repo's `.copilot/` directory). Disabled by default + * so arbitrary repos do not implicitly load MCP servers or skills. + * @default false + */ + enableConfigDiscovery?: boolean; + /** + * Reuse the CLI's logged-in user credentials (from `copilot login`) when + * no explicit token is provided via env vars. Defaults to true. + * @default true + */ + useLoggedInUser?: boolean; + /** + * Copilot CLI log level. When unset the SDK picks its own default. + */ + logLevel?: 'none' | 'error' | 'warning' | 'info' | 'debug' | 'all'; +} + /** * Community provider defaults for Pi (@mariozechner/pi-coding-agent). * v1 minimal shape; extend as capabilities are wired in.