diff --git a/.claude/rules/cli.md b/.claude/rules/cli.md index 11a1d68d81..a954b6bd18 100644 --- a/.claude/rules/cli.md +++ b/.claude/rules/cli.md @@ -29,9 +29,10 @@ bun run cli version ## Startup Behavior -1. Loads `~/.archon/.env` with `override: true` (Archon's config wins over any Bun-auto-loaded CWD vars) -2. Smart Claude auth default: if no `CLAUDE_API_KEY` or `CLAUDE_CODE_OAUTH_TOKEN`, sets `CLAUDE_USE_GLOBAL_AUTH=true` -3. Imports all commands AFTER dotenv setup +1. `@archon/paths/strip-cwd-env-boot` (first import) removes all Bun-auto-loaded CWD `.env` keys from `process.env` +2. Loads `~/.archon/.env` with `override: true` (Archon config wins over shell-inherited vars) +3. Smart Claude auth default: if no `CLAUDE_API_KEY` or `CLAUDE_CODE_OAUTH_TOKEN`, sets `CLAUDE_USE_GLOBAL_AUTH=true` +4. Imports all commands AFTER dotenv setup ## WorkflowRunOptions Interface diff --git a/CLAUDE.md b/CLAUDE.md index f38cb29a98..0e902537dd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -399,7 +399,7 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; ### Architecture Layers **Package Split:** -- **@archon/paths**: Path resolution utilities, Pino logger factory, web dist cache path (`getWebDistDir`) (no @archon/* deps) +- **@archon/paths**: Path resolution utilities, Pino logger factory, web dist cache path (`getWebDistDir`), CWD env stripper (`stripCwdEnv`, `strip-cwd-env-boot`) (no @archon/* deps; `pino` and `dotenv` are allowed external deps) - **@archon/git**: Git operations - worktrees, branches, repos, exec wrappers (depends only on @archon/paths) - **@archon/isolation**: Worktree isolation types, providers, resolver, error classifiers (depends only on @archon/git + @archon/paths) - **@archon/workflows**: Workflow engine - loader, router, executor, DAG, logger, bundled defaults (depends only on @archon/git + @archon/paths + @hono/zod-openapi + zod; DB/AI/config injected via `WorkflowDeps`) diff --git a/bun.lock b/bun.lock index 43f419a191..04517f4fbf 100644 --- a/bun.lock +++ b/bun.lock @@ -23,7 +23,7 @@ }, "packages/adapters": { "name": "@archon/adapters", - "version": "0.1.0", + "version": "0.3.5", "dependencies": { "@archon/core": "workspace:*", "@archon/git": "workspace:*", @@ -41,7 +41,7 @@ }, "packages/cli": { "name": "@archon/cli", - "version": "0.2.13", + "version": "0.3.5", "bin": { "archon": "./src/cli.ts", }, @@ -62,7 +62,7 @@ }, "packages/core": { "name": "@archon/core", - "version": "0.2.0", + "version": "0.3.5", "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.89", "@archon/git": "workspace:*", @@ -83,7 +83,7 @@ }, "packages/docs-web": { "name": "@archon/docs-web", - "version": "0.2.12", + "version": "0.3.5", "dependencies": { "@astrojs/starlight": "^0.38.0", "astro": "^6.1.0", @@ -92,7 +92,7 @@ }, "packages/git": { "name": "@archon/git", - "version": "0.1.0", + "version": "0.3.5", "dependencies": { "@archon/paths": "workspace:*", }, @@ -102,7 +102,7 @@ }, "packages/isolation": { "name": "@archon/isolation", - "version": "0.1.0", + "version": "0.3.5", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -113,8 +113,9 @@ }, "packages/paths": { "name": "@archon/paths", - "version": "0.2.0", + "version": "0.3.5", "dependencies": { + "dotenv": "^17", "pino": "^9", "pino-pretty": "^13", }, @@ -124,7 +125,7 @@ }, "packages/server": { "name": "@archon/server", - "version": "0.2.0", + "version": "0.3.5", "dependencies": { "@archon/adapters": "workspace:*", "@archon/core": "workspace:*", @@ -142,7 +143,7 @@ }, "packages/web": { "name": "@archon/web", - "version": "0.2.0", + "version": "0.3.5", "dependencies": { "@dagrejs/dagre": "^2.0.4", "@radix-ui/react-alert-dialog": "^1.1.15", @@ -194,7 +195,7 @@ }, "packages/workflows": { "name": "@archon/workflows", - "version": "0.1.0", + "version": "0.3.5", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 96c0209666..d7dedf4810 100755 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -7,18 +7,18 @@ * archon workflow run [msg] Run a workflow * archon version Show version info */ +// Must be the very first import — strips Bun-auto-loaded CWD .env keys before +// any module reads process.env at init time (e.g. @archon/paths/logger reads LOG_LEVEL). +import '@archon/paths/strip-cwd-env-boot'; import { parseArgs } from 'util'; import { config } from 'dotenv'; import { resolve } from 'path'; import { existsSync } from 'fs'; -// Load .env from global Archon config (override: true so ~/.archon/.env -// always wins over any Bun-auto-loaded CWD vars). -// -// Credential safety: target repo .env keys that Bun auto-loads from CWD -// cannot leak into AI subprocesses — SUBPROCESS_ENV_ALLOWLIST blocks them. -// The env-leak gate provides a second layer by scanning target repos before -// spawning. No CWD stripping needed. +// Load ~/.archon/.env with override: true — Archon-specific config must win +// over shell-inherited env vars (e.g. PORT, LOG_LEVEL from shell profile). +// CWD .env keys are already gone (stripCwdEnv above), so override only +// affects shell-inherited values, which is the intended behavior. const globalEnvPath = resolve(process.env.HOME ?? '~', '.archon', '.env'); if (existsSync(globalEnvPath)) { const result = config({ path: globalEnvPath, override: true }); @@ -30,6 +30,9 @@ if (existsSync(globalEnvPath)) { } } +// CLAUDECODE=1 warning is emitted inside stripCwdEnv() (boot import above) +// BEFORE the marker is deleted from process.env. No duplicate warning here. + // Smart defaults for Claude auth // If no explicit tokens, default to global auth from `claude /login` if (!process.env.CLAUDE_API_KEY && !process.env.CLAUDE_CODE_OAUTH_TOKEN) { diff --git a/packages/cli/src/commands/serve.ts b/packages/cli/src/commands/serve.ts index e24a5526a3..22e8ad30e7 100644 --- a/packages/cli/src/commands/serve.ts +++ b/packages/cli/src/commands/serve.ts @@ -60,7 +60,6 @@ export async function serveCommand(opts: ServeOptions): Promise { await startServer({ webDistPath: webDistDir, port: opts.port, - skipPlatformAdapters: true, }); } catch (err) { const error = toError(err); diff --git a/packages/core/package.json b/packages/core/package.json index d0d93635b6..cbad1463a3 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -23,7 +23,7 @@ "./state/*": "./src/state/*.ts" }, "scripts": { - "test": "bun test src/clients/codex-binary-guard.test.ts && bun test src/utils/codex-binary-resolver.test.ts && bun test src/utils/codex-binary-resolver-dev.test.ts && bun test src/clients/claude.test.ts src/clients/codex.test.ts src/clients/factory.test.ts && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-allowlist.test.ts src/utils/env-leak-scanner.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", + "test": "bun test src/clients/codex-binary-guard.test.ts && bun test src/utils/codex-binary-resolver.test.ts && bun test src/utils/codex-binary-resolver-dev.test.ts && bun test src/clients/claude.test.ts src/clients/codex.test.ts src/clients/factory.test.ts && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-leak-scanner.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", "type-check": "bun x tsc --noEmit", "build": "echo 'No build needed - Bun runs TypeScript directly'" }, diff --git a/packages/core/src/clients/claude.test.ts b/packages/core/src/clients/claude.test.ts index fd79d16280..e09c004822 100644 --- a/packages/core/src/clients/claude.test.ts +++ b/packages/core/src/clients/claude.test.ts @@ -446,9 +446,12 @@ describe('ClaudeClient', () => { ); }); - test('strips NODE_OPTIONS from subprocess env', async () => { - const original = process.env.NODE_OPTIONS; - process.env.NODE_OPTIONS = '--inspect'; + test('subprocess env passes through all process.env keys (no allowlist filtering)', async () => { + // With the allowlist removed, buildSubprocessEnv returns { ...process.env }. + // CWD .env leakage and CLAUDECODE markers are handled at entry point by + // stripCwdEnv(), not by buildSubprocessEnv(). See #1067, #1097. + const originalKey = process.env.CUSTOM_USER_KEY; + process.env.CUSTOM_USER_KEY = 'user-trusted-value'; mockQuery.mockImplementation(async function* () { // Empty generator @@ -460,113 +463,13 @@ describe('ClaudeClient', () => { } const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; - expect(callArgs.options.env.NODE_OPTIONS).toBeUndefined(); + expect(callArgs.options.env.CUSTOM_USER_KEY).toBe('user-trusted-value'); + expect(callArgs.options.env.PATH).toBe(process.env.PATH); + expect(callArgs.options.env.HOME).toBe(process.env.HOME); // Cleanup - if (original !== undefined) { - process.env.NODE_OPTIONS = original; - } else { - delete process.env.NODE_OPTIONS; - } - }); - - test('ANTHROPIC_API_KEY alone does not set hasExplicitTokens (falls through to global auth)', async () => { - const originalOauth = process.env.CLAUDE_CODE_OAUTH_TOKEN; - const originalApiKey = process.env.CLAUDE_API_KEY; - const originalAnthropicKey = process.env.ANTHROPIC_API_KEY; - - delete process.env.CLAUDE_CODE_OAUTH_TOKEN; - delete process.env.CLAUDE_API_KEY; - process.env.ANTHROPIC_API_KEY = 'sk-ant-test-key'; - - mockQuery.mockImplementation(async function* () { - // Empty generator - }); - - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - - // ANTHROPIC_API_KEY must NOT reach the subprocess: it is not in the - // SUBPROCESS_ENV_ALLOWLIST, so a leaked target-repo key cannot bill - // the wrong account. See issue #1029. - const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; - expect(callArgs.options.env.ANTHROPIC_API_KEY).toBeUndefined(); - // Explicit SDK vars are absent (useGlobalAuth=true path) - expect(callArgs.options.env.CLAUDE_API_KEY).toBeUndefined(); - expect(callArgs.options.env.CLAUDE_CODE_OAUTH_TOKEN).toBeUndefined(); - - // Cleanup - if (originalOauth !== undefined) process.env.CLAUDE_CODE_OAUTH_TOKEN = originalOauth; - else delete process.env.CLAUDE_CODE_OAUTH_TOKEN; - if (originalApiKey !== undefined) process.env.CLAUDE_API_KEY = originalApiKey; - else delete process.env.CLAUDE_API_KEY; - if (originalAnthropicKey !== undefined) process.env.ANTHROPIC_API_KEY = originalAnthropicKey; - else delete process.env.ANTHROPIC_API_KEY; - }); - - test('ANTHROPIC_API_KEY excluded from subprocess env when using explicit auth (useGlobalAuth=false)', async () => { - const originalOauth = process.env.CLAUDE_CODE_OAUTH_TOKEN; - const originalApiKey = process.env.CLAUDE_API_KEY; - const originalAnthropicKey = process.env.ANTHROPIC_API_KEY; - const originalGlobalAuth = process.env.CLAUDE_USE_GLOBAL_AUTH; - - // Force explicit auth path regardless of env - process.env.CLAUDE_USE_GLOBAL_AUTH = 'false'; - process.env.CLAUDE_API_KEY = 'sk-ant-explicit-key'; - process.env.ANTHROPIC_API_KEY = 'sk-ant-target-repo-key'; - delete process.env.CLAUDE_CODE_OAUTH_TOKEN; - - mockQuery.mockImplementation(async function* () { - // Empty generator - }); - - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - - // ANTHROPIC_API_KEY must NOT reach the subprocess regardless of which auth - // path is taken — the allowlist excludes it in both cases. See issue #1029. - const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; - expect(callArgs.options.env.ANTHROPIC_API_KEY).toBeUndefined(); - // Explicit auth vars are present on the useGlobalAuth=false path - expect(callArgs.options.env.CLAUDE_API_KEY).toBeDefined(); - - // Cleanup - if (originalOauth !== undefined) process.env.CLAUDE_CODE_OAUTH_TOKEN = originalOauth; - else delete process.env.CLAUDE_CODE_OAUTH_TOKEN; - if (originalApiKey !== undefined) process.env.CLAUDE_API_KEY = originalApiKey; - else delete process.env.CLAUDE_API_KEY; - if (originalAnthropicKey !== undefined) process.env.ANTHROPIC_API_KEY = originalAnthropicKey; - else delete process.env.ANTHROPIC_API_KEY; - if (originalGlobalAuth !== undefined) process.env.CLAUDE_USE_GLOBAL_AUTH = originalGlobalAuth; - else delete process.env.CLAUDE_USE_GLOBAL_AUTH; - }); - - test('strips VSCODE_INSPECTOR_OPTIONS from subprocess env', async () => { - const original = process.env.VSCODE_INSPECTOR_OPTIONS; - process.env.VSCODE_INSPECTOR_OPTIONS = 'some-value'; - - mockQuery.mockImplementation(async function* () { - // Empty generator - }); - - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - - const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; - expect(callArgs.options.env.VSCODE_INSPECTOR_OPTIONS).toBeUndefined(); - - // Cleanup - if (original !== undefined) { - process.env.VSCODE_INSPECTOR_OPTIONS = original; - } else { - delete process.env.VSCODE_INSPECTOR_OPTIONS; - } + if (originalKey !== undefined) process.env.CUSTOM_USER_KEY = originalKey; + else delete process.env.CUSTOM_USER_KEY; }); test('classifies exit code errors as crash and retries up to 3 times', async () => { @@ -1106,3 +1009,89 @@ describe('ClaudeClient', () => { }); }); }); + +describe('withFirstMessageTimeout', () => { + const { withFirstMessageTimeout } = claudeModule; + + test('completes normally when first event arrives before timeout', async () => { + async function* fastGen(): AsyncGenerator { + yield 'hello'; + yield 'world'; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(fastGen(), controller, 50, {}); + const first = await gen.next(); + expect(first.value).toBe('hello'); + const second = await gen.next(); + expect(second.value).toBe('world'); + }); + + test('throws after timeout when generator never yields', async () => { + async function* stuckGen(): AsyncGenerator { + await new Promise(() => {}); + yield 'never'; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(stuckGen(), controller, 50, {}); + await expect(gen.next()).rejects.toThrow('produced no output within 50ms'); + }); + + test('timeout error mentions issue #1067 for discoverability', async () => { + async function* stuckGen(): AsyncGenerator { + await new Promise(() => {}); + yield 'never'; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(stuckGen(), controller, 50, {}); + await expect(gen.next()).rejects.toThrow('1067'); + }); + + test('aborts the controller when timeout fires', async () => { + async function* stuckGen(): AsyncGenerator { + await new Promise(() => {}); + yield 'never'; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(stuckGen(), controller, 50, {}); + await expect(gen.next()).rejects.toThrow(); + expect(controller.signal.aborted).toBe(true); + }); + + test('handles generator that completes immediately without yielding', async () => { + async function* emptyGen(): AsyncGenerator { + return; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(emptyGen(), controller, 50, {}); + const result = await gen.next(); + expect(result.done).toBe(true); + }); + + test('logs diagnostic payload with env keys and process state on timeout', async () => { + async function* stuckGen(): AsyncGenerator { + await new Promise(() => {}); + yield 'never'; + } + const controller = new AbortController(); + const diagnostics = { + subprocessEnvKeys: ['PATH', 'HOME', 'CLAUDE_API_KEY'], + parentClaudeKeys: ['CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'], + model: 'sonnet', + platform: 'darwin', + }; + const gen = withFirstMessageTimeout(stuckGen(), controller, 50, diagnostics); + await expect(gen.next()).rejects.toThrow(); + + // Verify the diagnostic dump was logged at error level + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ + subprocessEnvKeys: ['PATH', 'HOME', 'CLAUDE_API_KEY'], + parentClaudeKeys: ['CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'], + model: 'sonnet', + platform: 'darwin', + timeoutMs: 50, + }), + 'claude.first_event_timeout' + ); + }); +}); diff --git a/packages/core/src/clients/claude.ts b/packages/core/src/clients/claude.ts index 1d2bd664b3..90595e1d25 100644 --- a/packages/core/src/clients/claude.ts +++ b/packages/core/src/clients/claude.ts @@ -35,7 +35,10 @@ import { type TokenUsage, } from '../types'; import { createLogger } from '@archon/paths'; -import { buildCleanSubprocessEnv } from '../utils/env-allowlist'; +// No env filtering here — process.env is already clean: +// stripCwdEnv() at entry point stripped CWD .env keys + CLAUDECODE markers, +// then ~/.archon/.env was loaded as the trusted source. All keys the user sets +// in ~/.archon/.env are intentional and pass through to the subprocess. import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; import * as codebaseDb from '../db/codebases'; import { loadConfig } from '../config/config-loader'; @@ -79,111 +82,31 @@ function normalizeClaudeUsage(usage?: { } /** - * Build environment for Claude subprocess + * Build environment for Claude subprocess. * - * Auth behavior: - * - CLAUDE_USE_GLOBAL_AUTH=true: Filter tokens, use global auth from `claude /login` - * - CLAUDE_USE_GLOBAL_AUTH=false: Pass tokens through explicitly - * - Not set: Auto-detect — use explicit tokens if present, otherwise fall back to global auth + * process.env is already clean at this point: + * - stripCwdEnv() at entry point removed CWD .env keys + CLAUDECODE markers + * - ~/.archon/.env loaded with override:true as the trusted source + * + * Auth mode is determined by the SDK based on what tokens are present: + * - Tokens in env → SDK uses them (explicit auth) + * - No tokens → SDK uses `claude /login` credentials (global auth) + * - User controls this by what they put in ~/.archon/.env + * + * We log the detected mode for diagnostics but don't filter — the user's + * config is trusted. See coleam00/Archon#1067 for design rationale. */ function buildSubprocessEnv(): NodeJS.ProcessEnv { - const globalAuthSetting = process.env.CLAUDE_USE_GLOBAL_AUTH?.toLowerCase(); - - // Check for empty token values (common misconfiguration) - const tokenVars = ['CLAUDE_CODE_OAUTH_TOKEN', 'CLAUDE_API_KEY'] as const; - const emptyTokens = tokenVars.filter(v => process.env[v] === ''); - if (emptyTokens.length > 0) { - getLog().warn({ emptyTokens }, 'empty_token_values'); - } - - // Warn if user has the legacy variable but not the new ones - if ( - process.env.ANTHROPIC_API_KEY && - !process.env.CLAUDE_CODE_OAUTH_TOKEN && - !process.env.CLAUDE_API_KEY - ) { - getLog().warn( - { hint: 'Use CLAUDE_API_KEY or CLAUDE_CODE_OAUTH_TOKEN instead' }, - 'deprecated_anthropic_api_key_ignored' - ); - } - const hasExplicitTokens = Boolean( process.env.CLAUDE_CODE_OAUTH_TOKEN ?? process.env.CLAUDE_API_KEY ); + const authMode = hasExplicitTokens ? 'explicit' : 'global'; + getLog().info( + { authMode }, + authMode === 'global' ? 'using_global_auth' : 'using_explicit_tokens' + ); - // Determine whether to use global auth - let useGlobalAuth: boolean; - if (globalAuthSetting === 'true') { - useGlobalAuth = true; - getLog().info({ authMode: 'global' }, 'using_global_auth'); - } else if (globalAuthSetting === 'false') { - useGlobalAuth = false; - getLog().info({ authMode: 'explicit' }, 'using_explicit_tokens'); - } else if (globalAuthSetting !== undefined) { - // Unrecognized value - warn and fall back to auto-detect - getLog().warn({ value: globalAuthSetting }, 'unrecognized_global_auth_setting'); - useGlobalAuth = !hasExplicitTokens; - } else { - // Not set - auto-detect: use tokens if present, otherwise global auth - useGlobalAuth = !hasExplicitTokens; - if (hasExplicitTokens) { - getLog().info({ authMode: 'explicit', autoDetected: true }, 'using_explicit_tokens'); - } else { - getLog().info({ authMode: 'global', autoDetected: true }, 'using_global_auth'); - } - } - - let baseEnv: NodeJS.ProcessEnv; - - if (useGlobalAuth) { - // Start from allowlist-filtered env, then strip auth tokens - const clean = buildCleanSubprocessEnv(); - const { CLAUDE_CODE_OAUTH_TOKEN, CLAUDE_API_KEY, ...envWithoutAuth } = clean; - - // Log if we're filtering out tokens (helps debug auth issues) - const filtered = [ - CLAUDE_CODE_OAUTH_TOKEN && 'CLAUDE_CODE_OAUTH_TOKEN', - CLAUDE_API_KEY && 'CLAUDE_API_KEY', - ].filter(Boolean); - - if (filtered.length > 0) { - getLog().info({ filteredVars: filtered }, 'global_auth_filtered_tokens'); - } - - baseEnv = envWithoutAuth; - } else { - // Start from allowlist-filtered env (includes auth tokens) - baseEnv = buildCleanSubprocessEnv(); - } - - // Clean env vars that interfere with Claude Code subprocess - const cleanedVars: string[] = []; - - // Strip nested-session guard marker (claude-code v2.1.41+). - // When the server is started from inside a Claude Code terminal, CLAUDECODE=1 - // is inherited and causes the subprocess to refuse to launch. - // See: https://github.com/anthropics/claude-code/issues/25434 - if (baseEnv.CLAUDECODE) { - delete baseEnv.CLAUDECODE; - cleanedVars.push('CLAUDECODE'); - } - - // Strip debugger env vars - // See: https://github.com/anthropics/claude-code/issues/4619 - if (baseEnv.NODE_OPTIONS) { - delete baseEnv.NODE_OPTIONS; - cleanedVars.push('NODE_OPTIONS'); - } - if (baseEnv.VSCODE_INSPECTOR_OPTIONS) { - delete baseEnv.VSCODE_INSPECTOR_OPTIONS; - cleanedVars.push('VSCODE_INSPECTOR_OPTIONS'); - } - if (cleanedVars.length > 0) { - getLog().info({ cleanedVars }, 'subprocess_env_cleaned'); - } - - return baseEnv; + return { ...process.env }; } /** Max retries for transient subprocess failures (3 = 4 total attempts). @@ -230,6 +153,88 @@ function classifySubprocessError( return 'unknown'; } +/** Default timeout for first SDK message (ms). Configurable via env var. */ +function getFirstEventTimeoutMs(): number { + const raw = process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; + if (raw) { + const parsed = Number(raw); + if (Number.isFinite(parsed) && parsed > 0) return parsed; + } + return 60_000; +} + +/** Build a diagnostic payload for claude.first_event_timeout log */ +function buildFirstEventHangDiagnostics( + subprocessEnv: Record, + model: string | undefined +): Record { + return { + subprocessEnvKeys: Object.keys(subprocessEnv), + parentClaudeKeys: Object.keys(process.env).filter( + k => k === 'CLAUDECODE' || k.startsWith('CLAUDE_CODE_') || k.startsWith('ANTHROPIC_') + ), + model, + platform: process.platform, + uid: getProcessUid(), + isTTY: process.stdout.isTTY ?? false, + claudeCode: process.env.CLAUDECODE, + claudeCodeEntrypoint: process.env.CLAUDE_CODE_ENTRYPOINT, + }; +} + +/** Sentinel error class to identify timeout rejections in withFirstMessageTimeout. */ +class FirstEventTimeoutError extends Error {} + +/** + * Wraps an async generator so that the first call to .next() must resolve + * within `timeoutMs`. If it doesn't, aborts the controller and throws a + * descriptive error. Subsequent .next() calls are forwarded directly. + * + * Uses Promise.race() — not just AbortController — because the pathological + * case is "SDK ignores abort", so we need an independent unblocking mechanism. + */ +export async function* withFirstMessageTimeout( + gen: AsyncGenerator, + controller: AbortController, + timeoutMs: number, + diagnostics: Record +): AsyncGenerator { + // Race first event against timeout + let timerId: ReturnType | undefined; + let firstValue: IteratorResult; + try { + firstValue = await Promise.race([ + gen.next(), + new Promise((_, reject) => { + timerId = setTimeout(() => { + reject(new FirstEventTimeoutError()); + }, timeoutMs); + }), + ]); + } catch (err) { + if (err instanceof FirstEventTimeoutError) { + controller.abort(); + getLog().error({ ...diagnostics, timeoutMs }, 'claude.first_event_timeout'); + throw new Error( + 'Claude Code subprocess produced no output within ' + + timeoutMs + + 'ms. ' + + 'See logs for claude.first_event_timeout diagnostic dump. ' + + 'Details: https://github.com/coleam00/Archon/issues/1067' + ); + } + throw err; + } finally { + clearTimeout(timerId); + } + + if (firstValue.done) return; + yield firstValue.value; + + // Forward remaining events directly + yield* gen; +} + /** * Returns the current process UID, or undefined on platforms that don't support it (e.g. Windows). * Exported for testing — spyOn(claudeModule, 'getProcessUid') works cross-platform. @@ -479,7 +484,14 @@ export class ClaudeClient implements IAssistantClient { } try { - for await (const msg of query({ prompt, options })) { + const rawEvents = query({ prompt, options }); + const timeoutMs = getFirstEventTimeoutMs(); + const diagnostics = buildFirstEventHangDiagnostics( + options.env as Record, + options.model + ); + const events = withFirstMessageTimeout(rawEvents, controller, timeoutMs, diagnostics); + for await (const msg of events) { // Drain tool results captured by PostToolUse hook before processing the next message while (toolResultQueue.length > 0) { const tr = toolResultQueue.shift(); diff --git a/packages/core/src/utils/env-allowlist.test.ts b/packages/core/src/utils/env-allowlist.test.ts deleted file mode 100644 index e5a51d78e0..0000000000 --- a/packages/core/src/utils/env-allowlist.test.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; -import { buildCleanSubprocessEnv, SUBPROCESS_ENV_ALLOWLIST } from './env-allowlist'; - -describe('buildCleanSubprocessEnv', () => { - let originalEnv: NodeJS.ProcessEnv; - - beforeEach(() => { - originalEnv = { ...process.env }; - }); - - afterEach(() => { - for (const key of Object.keys(process.env)) { - if (!(key in originalEnv)) delete process.env[key]; - } - Object.assign(process.env, originalEnv); - }); - - it('includes allowlisted vars present in process.env', () => { - process.env.CLAUDE_USE_GLOBAL_AUTH = 'true'; - const env = buildCleanSubprocessEnv(); - expect(env.CLAUDE_USE_GLOBAL_AUTH).toBe('true'); - }); - - it('excludes ANTHROPIC_API_KEY (not in allowlist)', () => { - process.env.ANTHROPIC_API_KEY = 'leaked-key-from-target-repo'; - const env = buildCleanSubprocessEnv(); - expect(env.ANTHROPIC_API_KEY).toBeUndefined(); - }); - - it('excludes arbitrary target-repo vars', () => { - process.env.MY_APP_SECRET = 'should-not-leak'; - process.env.POSTGRES_PASSWORD = 'db-secret'; - const env = buildCleanSubprocessEnv(); - expect(env.MY_APP_SECRET).toBeUndefined(); - expect(env.POSTGRES_PASSWORD).toBeUndefined(); - }); - - it('includes PATH and HOME (system essentials)', () => { - const env = buildCleanSubprocessEnv(); - expect(env.PATH).toBe(process.env.PATH); - expect(env.HOME).toBe(process.env.HOME); - }); - - it('includes GITHUB_TOKEN when present', () => { - process.env.GITHUB_TOKEN = 'ghp_test123'; - const env = buildCleanSubprocessEnv(); - expect(env.GITHUB_TOKEN).toBe('ghp_test123'); - }); - - it('does not include keys with undefined values', () => { - const env = buildCleanSubprocessEnv(); - for (const value of Object.values(env)) { - expect(value).not.toBeUndefined(); - } - }); -}); - -describe('SUBPROCESS_ENV_ALLOWLIST', () => { - it('does not contain ANTHROPIC_API_KEY', () => { - expect(SUBPROCESS_ENV_ALLOWLIST.has('ANTHROPIC_API_KEY')).toBe(false); - }); - - it('does not contain DATABASE_URL', () => { - expect(SUBPROCESS_ENV_ALLOWLIST.has('DATABASE_URL')).toBe(false); - }); - - it('contains CLAUDE_API_KEY', () => { - expect(SUBPROCESS_ENV_ALLOWLIST.has('CLAUDE_API_KEY')).toBe(true); - }); -}); diff --git a/packages/core/src/utils/env-allowlist.ts b/packages/core/src/utils/env-allowlist.ts deleted file mode 100644 index d17f30ac55..0000000000 --- a/packages/core/src/utils/env-allowlist.ts +++ /dev/null @@ -1,70 +0,0 @@ -/** - * Subprocess environment allowlist - * - * Controls which process.env keys are passed to Claude Code subprocesses. - * Using an allowlist prevents target-repo .env leakage (Bun auto-loads CWD .env). - * Per-codebase env vars (codebase_env_vars table / .archon/config.yaml `env:`) are - * merged on top by the workflow executor via requestOptions.env — those are unaffected. - */ - -/** Canonical set of env vars Claude Code subprocess legitimately needs */ -export const SUBPROCESS_ENV_ALLOWLIST = new Set([ - // System essentials needed by tools, git, shell operations - 'PATH', - 'HOME', - 'USER', - 'LOGNAME', - 'SHELL', - 'TERM', - 'TMPDIR', - 'TEMP', - 'TMP', - 'LANG', - 'LC_ALL', - 'LC_CTYPE', - 'TZ', - 'SSH_AUTH_SOCK', - // Claude auth and config - 'CLAUDE_USE_GLOBAL_AUTH', - 'CLAUDE_API_KEY', - 'CLAUDE_CODE_OAUTH_TOKEN', - 'CLAUDE_CODE_USE_BEDROCK', - 'CLAUDE_CODE_USE_VERTEX', - 'ANTHROPIC_BASE_URL', - 'ANTHROPIC_BEDROCK_BASE_URL', - 'ANTHROPIC_VERTEX_PROJECT_ID', - 'ANTHROPIC_VERTEX_REGION', - // Archon runtime config - 'ARCHON_HOME', - 'ARCHON_DOCKER', - 'IS_SANDBOX', - 'WORKSPACE_PATH', - 'LOG_LEVEL', - // Git identity (used by git commits inside workflows) - 'GIT_AUTHOR_NAME', - 'GIT_AUTHOR_EMAIL', - 'GIT_COMMITTER_NAME', - 'GIT_COMMITTER_EMAIL', - 'GIT_SSH_COMMAND', - // GitHub CLI (used by Claude Code tools) - 'GITHUB_TOKEN', - 'GH_TOKEN', -]); - -/** - * Build a clean subprocess env from process.env using the allowlist. - * Call this instead of spreading process.env directly. - * - * The caller (buildSubprocessEnv in claude.ts) then applies auth filtering - * on top (strip CLAUDE_CODE_OAUTH_TOKEN/CLAUDE_API_KEY when using global auth). - * Per-query env overrides (requestOptions.env) are merged last by the caller. - */ -export function buildCleanSubprocessEnv(): NodeJS.ProcessEnv { - const clean: NodeJS.ProcessEnv = {}; - for (const key of SUBPROCESS_ENV_ALLOWLIST) { - if (process.env[key] !== undefined) { - clean[key] = process.env[key]; - } - } - return clean; -} diff --git a/packages/docs-web/src/content/docs/reference/cli.md b/packages/docs-web/src/content/docs/reference/cli.md index f2821a1b8b..33f6436884 100644 --- a/packages/docs-web/src/content/docs/reference/cli.md +++ b/packages/docs-web/src/content/docs/reference/cli.md @@ -362,11 +362,12 @@ When using `--branch`, workflows run inside the worktree directory. ## Environment -The CLI loads `~/.archon/.env` with `override: true`, so Archon's own config always wins over any env vars Bun auto-loads from the current working directory. Target repo env vars remain in `process.env` but cannot reach AI subprocesses — `SUBPROCESS_ENV_ALLOWLIST` blocks all non-whitelisted keys. +At startup, the CLI strips all Bun-auto-loaded CWD `.env` keys and nested Claude Code session markers from `process.env`, then loads `~/.archon/.env` as the sole trusted source. All keys you set in `~/.archon/.env` pass through to AI subprocesses — no allowlist filtering. On startup, the CLI: -1. Loads `~/.archon/.env` with `override: true` (Archon's config wins over CWD vars) -2. Auto-enables global Claude auth if no explicit tokens are set +1. Strips CWD `.env` keys + `CLAUDECODE` markers from `process.env` (via `stripCwdEnv`) +2. Loads `~/.archon/.env` (all keys trusted) +3. Auto-enables global Claude auth if no explicit tokens are set ## Database diff --git a/packages/docs-web/src/content/docs/reference/configuration.md b/packages/docs-web/src/content/docs/reference/configuration.md index a1024c530c..c126b968f1 100644 --- a/packages/docs-web/src/content/docs/reference/configuration.md +++ b/packages/docs-web/src/content/docs/reference/configuration.md @@ -190,6 +190,7 @@ Environment variables override all other configuration. They are organized by ca | `DEFAULT_AI_ASSISTANT` | Default AI assistant (`claude` or `codex`) | `claude` | | `MAX_CONCURRENT_CONVERSATIONS` | Maximum concurrent AI conversations | `10` | | `SESSION_RETENTION_DAYS` | Delete inactive sessions older than N days | `30` | +| `ARCHON_SUPPRESS_NESTED_CLAUDE_WARNING` | When set to `1`, suppresses the stderr warning emitted when `archon` is run inside a Claude Code session | -- | ### AI Providers -- Claude @@ -199,6 +200,7 @@ Environment variables override all other configuration. They are organized by ca | `CLAUDE_CODE_OAUTH_TOKEN` | Explicit OAuth token (alternative to global auth) | -- | | `CLAUDE_API_KEY` | Explicit API key (alternative to global auth) | -- | | `TITLE_GENERATION_MODEL` | Lightweight model for generating conversation titles | SDK default | +| `ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS` | Timeout (ms) before Claude subprocess is considered hung (throws with diagnostic log) | `60000` | When `CLAUDE_USE_GLOBAL_AUTH` is unset, Archon auto-detects: it uses explicit tokens if present, otherwise falls back to global auth. @@ -296,11 +298,11 @@ Infrastructure configuration (database URL, platform tokens) is stored in `.env` | Component | Location | Purpose | |-----------|----------|---------| -| **CLI** | `~/.archon/.env` | Global infrastructure config (only source, loaded with `override: true`) | -| **Server (dev)** | `/.env` + `~/.archon/.env` | Repo `.env` for platform tokens; `~/.archon/.env` overrides with `override: true` | +| **CLI** | `~/.archon/.env` | Global infrastructure config; CWD .env keys stripped first, then loaded with `override: true` (Archon config wins over shell-inherited vars) | +| **Server (dev)** | `/.env` + `~/.archon/.env` | Repo `.env` for platform tokens; `~/.archon/.env` loaded with `override: true` | | **Server (binary)** | `~/.archon/.env` | Single source of truth (repo `.env` path is not available in compiled binaries) | -**How it works**: Both the CLI and server load `~/.archon/.env` with `override: true`, so Archon's own config always wins over any env vars Bun auto-loads from the current working directory. Target repo env vars remain in `process.env` but cannot reach AI subprocesses — `SUBPROCESS_ENV_ALLOWLIST` blocks all non-whitelisted keys. +**How it works**: At startup, the CLI and server strip all keys that Bun auto-loaded from the current working directory (`.env`, `.env.local`, `.env.development`, `.env.production`) and any nested Claude Code session markers (`CLAUDECODE`, `CLAUDE_CODE_*` except auth vars) before loading `~/.archon/.env`. This ensures target repo keys and nested-session guards are fully removed from `process.env` before any application code runs. **Best practice**: Use `~/.archon/.env` as the single source of truth: diff --git a/packages/docs-web/src/content/docs/reference/security.md b/packages/docs-web/src/content/docs/reference/security.md index 26e26d169a..4a2907d855 100644 --- a/packages/docs-web/src/content/docs/reference/security.md +++ b/packages/docs-web/src/content/docs/reference/security.md @@ -119,13 +119,14 @@ The GitHub and Gitea adapters verify webhook signatures to ensure payloads origi - Never commit `.env` files to git. The repository's `.gitignore` excludes them. **Subprocess env isolation:** -- Bun auto-loads `.env` from CWD before any Archon code runs. These vars remain in the server/CLI's `process.env` but **cannot reach AI subprocesses** — Claude Code subprocesses receive only an explicit allowlist of env vars (`SUBPROCESS_ENV_ALLOWLIST`: system essentials, Claude auth, Archon runtime config, git identity, GitHub tokens). Keys like `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, and `DATABASE_URL` are not on the allowlist and are blocked. -- `~/.archon/.env` is loaded with `override: true`, so Archon's own config always wins over any Bun-auto-loaded CWD vars for overlapping keys. -- Per-codebase env vars configured via `codebase_env_vars` or `.archon/config.yaml` `env:` are merged on top of this filtered base at workflow execution time. +- At startup, `stripCwdEnv()` removes **all** keys that Bun auto-loaded from the CWD `.env` files, plus nested Claude Code session markers (`CLAUDECODE`, `CLAUDE_CODE_*` except auth vars) and debugger vars (`NODE_OPTIONS`, `VSCODE_INSPECTOR_OPTIONS`). This runs before any module reads `process.env`. +- `~/.archon/.env` is then loaded as the trusted source of Archon configuration. All keys the user sets there pass through to subprocesses — there is no allowlist filtering. The user controls this file and all keys are intentional. +- Per-codebase env vars configured via `codebase_env_vars` or `.archon/config.yaml` `env:` are merged on top at workflow execution time. +- CWD `.env` keys are the **only** untrusted source. They belong to the target project, not to Archon. ### Env-leak gate (target repo `.env` keys) -Beyond the subprocess allowlist, Archon also scans target repos for sensitive keys **before spawning**. A Claude or Codex subprocess started with `cwd=/path/to/target/repo` inherits its own Bun auto-loaded `.env` — the env-leak gate catches this by scanning the target repo's `.env` files at registration and pre-spawn time. +As a second layer of defense, Archon scans target repos for sensitive keys **before spawning** AI subprocesses. A Claude or Codex subprocess started with `cwd=/path/to/target/repo` inherits Bun's auto-loaded `.env` from that CWD — the env-leak gate catches this by scanning the target repo's `.env` files at registration and pre-spawn time. **What Archon scans:** auto-loaded filenames `.env`, `.env.local`, `.env.development`, `.env.production`, `.env.development.local`, `.env.production.local`. diff --git a/packages/docs-web/src/content/docs/reference/troubleshooting.md b/packages/docs-web/src/content/docs/reference/troubleshooting.md index 8c6d1527ac..50805c7911 100644 --- a/packages/docs-web/src/content/docs/reference/troubleshooting.md +++ b/packages/docs-web/src/content/docs/reference/troubleshooting.md @@ -279,3 +279,23 @@ docker compose exec app ls -la /.archon/workspaces ```bash docker compose exec app git clone https://github.com/user/repo /.archon/workspaces/test-repo ``` + +## Workflows Hang Silently When Run Inside Claude Code + +**Symptom:** Workflows started from within a Claude Code session (e.g., via the Terminal tool) produce no output, or the CLI emits a warning about `CLAUDECODE=1` before the workflow hangs. + +**Cause:** Nested Claude Code sessions can deadlock — the outer session waits for tool results that the inner session never delivers. + +**Fix:** Run `archon serve` from a regular shell outside Claude Code and use the Web UI or HTTP API instead. + +**Suppress the warning:** If you have a non-deadlocking setup and want to silence the warning: + +```bash +ARCHON_SUPPRESS_NESTED_CLAUDE_WARNING=1 archon workflow run ... +``` + +**Adjust the timeout:** If your environment is slow and hitting the 60-second first-event timeout: + +```bash +ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS=120000 archon workflow run ... +``` diff --git a/packages/paths/package.json b/packages/paths/package.json index 047f1e87c6..0f366d98c3 100644 --- a/packages/paths/package.json +++ b/packages/paths/package.json @@ -5,13 +5,16 @@ "main": "./src/index.ts", "types": "./src/index.ts", "exports": { - ".": "./src/index.ts" + ".": "./src/index.ts", + "./strip-cwd-env": "./src/strip-cwd-env.ts", + "./strip-cwd-env-boot": "./src/strip-cwd-env-boot.ts" }, "scripts": { "test": "bun test src/", "type-check": "bun x tsc --noEmit" }, "dependencies": { + "dotenv": "^17", "pino": "^9", "pino-pretty": "^13" }, diff --git a/packages/paths/src/env-integration.test.ts b/packages/paths/src/env-integration.test.ts new file mode 100644 index 0000000000..0654c1a22e --- /dev/null +++ b/packages/paths/src/env-integration.test.ts @@ -0,0 +1,208 @@ +/** + * Integration tests for the env isolation flow: + * Bun auto-load (simulated) → stripCwdEnv() → ~/.archon/.env load → subprocess env + * + * Tests the full user scenario: what keys reach the Claude subprocess when the + * user has various combinations of CWD .env, ~/.archon/.env, and shell env? + * + * Note: We can't actually test Bun's auto-load (it runs before any code), so we + * simulate it by setting process.env keys before calling stripCwdEnv(). This is + * equivalent — Bun's auto-load just does process.env[key] = value, same as us. + */ +import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; +import { writeFileSync, mkdirSync, rmSync } from 'fs'; +import { join } from 'path'; +import { config } from 'dotenv'; +import { stripCwdEnv } from './strip-cwd-env'; + +// Track all test keys so afterEach can clean them up reliably +const TEST_KEYS = [ + 'ANTHROPIC_API_KEY', + 'CLAUDE_API_KEY', + 'CLAUDE_CODE_OAUTH_TOKEN', + 'CLAUDE_USE_GLOBAL_AUTH', + 'DATABASE_URL', + 'LOG_LEVEL', + 'CWD_ONLY_KEY', + 'ARCHON_ONLY_KEY', + 'SHARED_KEY', + 'MY_SECRET_TOKEN', + 'CLAUDECODE', + 'CLAUDE_CODE_ENTRYPOINT', + 'NODE_OPTIONS', + 'REDIS_URL', +]; + +describe('env isolation integration', () => { + const cwdDir = join(import.meta.dir, '__env-integration-cwd__'); + const archonDir = join(import.meta.dir, '__env-integration-archon__'); + let savedEnv: Record; + + beforeEach(() => { + // Save original env state + savedEnv = {}; + for (const key of TEST_KEYS) { + savedEnv[key] = process.env[key]; + } + mkdirSync(cwdDir, { recursive: true }); + mkdirSync(archonDir, { recursive: true }); + }); + + afterEach(() => { + // Restore original env + for (const key of TEST_KEYS) { + if (savedEnv[key] !== undefined) { + process.env[key] = savedEnv[key]; + } else { + delete process.env[key]; + } + } + rmSync(cwdDir, { recursive: true, force: true }); + rmSync(archonDir, { recursive: true, force: true }); + }); + + /** + * Simulate the full entry-point flow: + * 1. "Bun auto-load" (set CWD .env keys in process.env) + * 2. stripCwdEnv() (remove CWD keys + markers) + * 3. Load ~/.archon/.env (dotenv.config) + * 4. Return process.env snapshot (what buildSubprocessEnv would return) + */ + function simulateEntryPointFlow(cwdEnv: string, archonEnv: string): NodeJS.ProcessEnv { + // Write the CWD .env file + writeFileSync(join(cwdDir, '.env'), cwdEnv); + + // Simulate Bun auto-load: parse CWD .env and set in process.env + const cwdParsed = config({ path: join(cwdDir, '.env'), processEnv: {} }); + if (cwdParsed.parsed) { + for (const [key, value] of Object.entries(cwdParsed.parsed)) { + process.env[key] = value; + } + } + + // Step 2: stripCwdEnv (same as entry point) + stripCwdEnv(cwdDir); + + // Step 3: Load ~/.archon/.env with override — user's Archon config wins + // over any shell-inherited vars (same as real entry point). + writeFileSync(join(archonDir, '.env'), archonEnv); + config({ path: join(archonDir, '.env'), override: true }); + + // Step 4: Return subprocess env snapshot + return { ...process.env }; + } + + it('scenario 1: global auth user with ANTHROPIC_API_KEY in CWD .env — CWD key stripped', () => { + // User ran `claude /login` (global auth). Target repo has ANTHROPIC_API_KEY + // in its .env. That key must NOT reach the subprocess. + const subprocessEnv = simulateEntryPointFlow( + 'ANTHROPIC_API_KEY=sk-target-repo-leaked\nDATABASE_URL=postgres://target/db\n', + 'CLAUDE_USE_GLOBAL_AUTH=true\n' + ); + + expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); + expect(subprocessEnv.DATABASE_URL).toBeUndefined(); + expect(subprocessEnv.CLAUDE_USE_GLOBAL_AUTH).toBe('true'); + }); + + it('scenario 2: user has OAuth token in archon env + random key in CWD .env — CWD stripped, archon kept', () => { + const subprocessEnv = simulateEntryPointFlow( + 'CWD_ONLY_KEY=from-target-repo\nLOG_LEVEL=debug\n', + 'CLAUDE_CODE_OAUTH_TOKEN=sk-ant-oat01-my-token\nCLAUDE_USE_GLOBAL_AUTH=false\n' + ); + + // CWD keys must be gone + expect(subprocessEnv.CWD_ONLY_KEY).toBeUndefined(); + expect(subprocessEnv.LOG_LEVEL).toBeUndefined(); + // Archon keys must be present + expect(subprocessEnv.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-my-token'); + expect(subprocessEnv.CLAUDE_USE_GLOBAL_AUTH).toBe('false'); + }); + + it('scenario 3: nothing from CWD .env leaks to subprocess', () => { + const subprocessEnv = simulateEntryPointFlow( + 'MY_SECRET_TOKEN=leaked\nDATABASE_URL=postgres://wrong/db\nLOG_LEVEL=trace\nANTHROPIC_API_KEY=sk-wrong-key\n', + 'ARCHON_ONLY_KEY=trusted\n' + ); + + // ALL CWD keys must be gone + expect(subprocessEnv.MY_SECRET_TOKEN).toBeUndefined(); + expect(subprocessEnv.DATABASE_URL).toBeUndefined(); + expect(subprocessEnv.LOG_LEVEL).toBeUndefined(); + expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); + // Archon key present + expect(subprocessEnv.ARCHON_ONLY_KEY).toBe('trusted'); + // Shell-inherited keys present + expect(subprocessEnv.PATH).toBeDefined(); + expect(subprocessEnv.HOME).toBeDefined(); + }); + + it('scenario 4: same key in both CWD and archon env — archon value wins', () => { + // User has ANTHROPIC_API_KEY in both places. CWD one is the target repo's, + // archon one is the user's intentional config. Archon must win. + const subprocessEnv = simulateEntryPointFlow( + 'ANTHROPIC_API_KEY=sk-target-repo-WRONG\nSHARED_KEY=cwd-value\n', + 'ANTHROPIC_API_KEY=sk-my-real-key\nSHARED_KEY=archon-value\n' + ); + + // Archon value wins (CWD was stripped, then archon loaded) + expect(subprocessEnv.ANTHROPIC_API_KEY).toBe('sk-my-real-key'); + expect(subprocessEnv.SHARED_KEY).toBe('archon-value'); + }); + + it('CLAUDECODE markers stripped even if not from CWD .env', () => { + // Simulating: parent Claude Code shell sets CLAUDECODE=1 + // (not from .env file, from inherited shell env) + process.env.CLAUDECODE = '1'; + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + process.env.NODE_OPTIONS = '--inspect'; + + const subprocessEnv = simulateEntryPointFlow('', ''); + + expect(subprocessEnv.CLAUDECODE).toBeUndefined(); + expect(subprocessEnv.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + expect(subprocessEnv.NODE_OPTIONS).toBeUndefined(); + }); + + it('scenario 5: DATABASE_URL in CWD .env does not reach Archon — archon uses its own DB', () => { + // Target repo has DATABASE_URL for its own PostgreSQL. Archon must NOT + // connect to the target app's database — it should use its own DB + // (from ~/.archon/.env or default SQLite). + const subprocessEnv = simulateEntryPointFlow( + 'DATABASE_URL=postgresql://target-app:5432/wrong_db\nREDIS_URL=redis://target:6379\n', + 'DATABASE_URL=sqlite:///Users/me/.archon/archon.db\n' + ); + + // CWD DATABASE_URL is stripped, archon's wins + expect(subprocessEnv.DATABASE_URL).toBe('sqlite:///Users/me/.archon/archon.db'); + // Other CWD keys also stripped + expect(subprocessEnv.REDIS_URL).toBeUndefined(); + }); + + it('scenario 6: DATABASE_URL in CWD .env only (no archon env) — stripped entirely', () => { + // User relies on default SQLite (no DATABASE_URL in ~/.archon/.env). + // Target repo's DATABASE_URL must not leak. + const subprocessEnv = simulateEntryPointFlow( + 'DATABASE_URL=postgresql://target-app:5432/production\n', + '' + ); + + expect(subprocessEnv.DATABASE_URL).toBeUndefined(); + }); + + it('CLAUDE_CODE_OAUTH_TOKEN from archon env survives marker strip', () => { + // CLAUDE_CODE_* markers are stripped, but CLAUDE_CODE_OAUTH_TOKEN is + // an auth var and must be preserved. + process.env.CLAUDECODE = '1'; + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + + const subprocessEnv = simulateEntryPointFlow( + '', + 'CLAUDE_CODE_OAUTH_TOKEN=sk-ant-oat01-keep-this\n' + ); + + expect(subprocessEnv.CLAUDECODE).toBeUndefined(); + expect(subprocessEnv.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + expect(subprocessEnv.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-keep-this'); + }); +}); diff --git a/packages/paths/src/strip-cwd-env-boot.ts b/packages/paths/src/strip-cwd-env-boot.ts new file mode 100644 index 0000000000..e8125244bc --- /dev/null +++ b/packages/paths/src/strip-cwd-env-boot.ts @@ -0,0 +1,13 @@ +/** + * Side-effect entry point: strips Bun-auto-loaded CWD .env keys at import time. + * + * Import this as the FIRST import in CLI entry points so it runs + * before any module that reads process.env at initialization time. + * + * @example + * // packages/cli/src/cli.ts — must be the very first import + * import '@archon/paths/strip-cwd-env-boot'; + */ +import { stripCwdEnv } from './strip-cwd-env'; + +stripCwdEnv(); diff --git a/packages/paths/src/strip-cwd-env.test.ts b/packages/paths/src/strip-cwd-env.test.ts new file mode 100644 index 0000000000..9576f0aa0a --- /dev/null +++ b/packages/paths/src/strip-cwd-env.test.ts @@ -0,0 +1,159 @@ +import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; +import { writeFileSync, mkdirSync, rmSync } from 'fs'; +import { join } from 'path'; +import { stripCwdEnv } from './strip-cwd-env'; + +describe('stripCwdEnv', () => { + const tmpDir = join(import.meta.dir, '__strip-cwd-env-test-tmp__'); + + beforeEach(() => { + mkdirSync(tmpDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(tmpDir, { recursive: true, force: true }); + delete process.env.TEST_STRIP_KEY; + delete process.env.TEST_STRIP_KEY2; + delete process.env.TEST_STRIP_KEY_A; + delete process.env.TEST_STRIP_KEY_B; + // Clean up nested-session marker test keys + delete process.env.CLAUDECODE; + delete process.env.CLAUDE_CODE_ENTRYPOINT; + delete process.env.CLAUDE_CODE_EXECPATH; + delete process.env.CLAUDE_CODE_NO_FLICKER; + delete process.env.CLAUDE_CODE_OAUTH_TOKEN; + delete process.env.CLAUDE_CODE_USE_BEDROCK; + delete process.env.CLAUDE_CODE_USE_VERTEX; + delete process.env.NODE_OPTIONS; + delete process.env.VSCODE_INSPECTOR_OPTIONS; + }); + + it('strips keys from single .env file', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY=leaked\n'); + process.env.TEST_STRIP_KEY = 'leaked'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + }); + + it('strips keys from all four Bun-auto-loaded files', () => { + for (const f of ['.env', '.env.local', '.env.development', '.env.production']) { + writeFileSync(join(tmpDir, f), 'TEST_STRIP_KEY=leaked\n'); + } + process.env.TEST_STRIP_KEY = 'leaked'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + }); + + it('does nothing when no CWD .env files exist', () => { + process.env.TEST_STRIP_KEY = 'safe'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBe('safe'); + }); + + it('preserves keys not in CWD .env files', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY=leaked\n'); + process.env.TEST_STRIP_KEY = 'leaked'; + process.env.TEST_STRIP_KEY2 = 'preserved'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + expect(process.env.TEST_STRIP_KEY2).toBe('preserved'); + }); + + it('tolerates malformed .env lines', () => { + writeFileSync(join(tmpDir, '.env'), 'NOTAKEYVALUE\nTEST_STRIP_KEY=leaked\n'); + process.env.TEST_STRIP_KEY = 'leaked'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + }); + + it('does not delete key if it was not in process.env (no-op)', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY=parsed\n'); + // Do NOT set process.env.TEST_STRIP_KEY — simulate key parsed but not auto-loaded + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); // still undefined, no error + }); + + it('strips distinct keys from different .env files', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY_A=leaked\n'); + writeFileSync(join(tmpDir, '.env.local'), 'TEST_STRIP_KEY_B=leaked\n'); + process.env.TEST_STRIP_KEY_A = 'leaked'; + process.env.TEST_STRIP_KEY_B = 'leaked'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY_A).toBeUndefined(); + expect(process.env.TEST_STRIP_KEY_B).toBeUndefined(); + }); +}); + +describe('stripCwdEnv — nested Claude Code marker stripping', () => { + const tmpDir = join(import.meta.dir, '__strip-markers-test-tmp__'); + + beforeEach(() => { + mkdirSync(tmpDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(tmpDir, { recursive: true, force: true }); + delete process.env.CLAUDECODE; + delete process.env.CLAUDE_CODE_ENTRYPOINT; + delete process.env.CLAUDE_CODE_EXECPATH; + delete process.env.CLAUDE_CODE_NO_FLICKER; + delete process.env.CLAUDE_CODE_HIDE_ACCOUNT_INFO; + delete process.env.CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS; + delete process.env.CLAUDE_CODE_OAUTH_TOKEN; + delete process.env.CLAUDE_CODE_USE_BEDROCK; + delete process.env.CLAUDE_CODE_USE_VERTEX; + delete process.env.NODE_OPTIONS; + delete process.env.VSCODE_INSPECTOR_OPTIONS; + }); + + it('strips CLAUDECODE from process.env', () => { + process.env.CLAUDECODE = '1'; + stripCwdEnv(tmpDir); + expect(process.env.CLAUDECODE).toBeUndefined(); + }); + + it('strips CLAUDE_CODE_* session markers', () => { + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + process.env.CLAUDE_CODE_EXECPATH = '/usr/local/bin/claude'; + process.env.CLAUDE_CODE_NO_FLICKER = '1'; + process.env.CLAUDE_CODE_HIDE_ACCOUNT_INFO = '1'; + process.env.CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS = '1'; + stripCwdEnv(tmpDir); + expect(process.env.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + expect(process.env.CLAUDE_CODE_EXECPATH).toBeUndefined(); + expect(process.env.CLAUDE_CODE_NO_FLICKER).toBeUndefined(); + expect(process.env.CLAUDE_CODE_HIDE_ACCOUNT_INFO).toBeUndefined(); + expect(process.env.CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS).toBeUndefined(); + }); + + it('preserves CLAUDE_CODE_* auth vars', () => { + process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-secret'; + process.env.CLAUDE_CODE_USE_BEDROCK = '1'; + process.env.CLAUDE_CODE_USE_VERTEX = '1'; + stripCwdEnv(tmpDir); + expect(process.env.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-secret'); + expect(process.env.CLAUDE_CODE_USE_BEDROCK).toBe('1'); + expect(process.env.CLAUDE_CODE_USE_VERTEX).toBe('1'); + }); + + it('strips NODE_OPTIONS and VSCODE_INSPECTOR_OPTIONS', () => { + process.env.NODE_OPTIONS = '--inspect'; + process.env.VSCODE_INSPECTOR_OPTIONS = '{"port":9229}'; + stripCwdEnv(tmpDir); + expect(process.env.NODE_OPTIONS).toBeUndefined(); + expect(process.env.VSCODE_INSPECTOR_OPTIONS).toBeUndefined(); + }); + + it('handles combined CWD .env + nested session markers in one call', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY=leaked\n'); + process.env.TEST_STRIP_KEY = 'leaked'; + process.env.CLAUDECODE = '1'; + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + process.env.CLAUDE_CODE_OAUTH_TOKEN = 'keep-me'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + expect(process.env.CLAUDECODE).toBeUndefined(); + expect(process.env.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + expect(process.env.CLAUDE_CODE_OAUTH_TOKEN).toBe('keep-me'); + }); +}); diff --git a/packages/paths/src/strip-cwd-env.ts b/packages/paths/src/strip-cwd-env.ts new file mode 100644 index 0000000000..17c4a3c903 --- /dev/null +++ b/packages/paths/src/strip-cwd-env.ts @@ -0,0 +1,94 @@ +/** + * Cleans process.env at startup — BEFORE any module reads env at init time + * (notably `@archon/paths/logger` which reads `LOG_LEVEL` during module load). + * + * Two concerns handled in one pass: + * + * 1. CWD .env leak: Bun unconditionally loads .env / .env.local / + * .env.development / .env.production from CWD before any user code runs. + * When `archon` is invoked from inside a target repo, that repo's env vars + * leak into the Archon process. `override: true` in dotenv only fixes keys + * that exist in both files — keys that only appear in the target repo's .env + * survive unaffected. We strip them. + * + * 2. Nested Claude Code session markers: When archon is launched from inside a + * Claude Code terminal, the parent shell exports CLAUDECODE=1 and several + * CLAUDE_CODE_* markers. The Claude Agent SDK leaks process.env into the + * spawned child regardless of the explicit `env` option + * (see coleam00/Archon#1097), so the only way to prevent the nested-session + * deadlock is to delete the markers from process.env at the entry point. + * Auth vars (CLAUDE_CODE_OAUTH_TOKEN, CLAUDE_CODE_USE_BEDROCK, + * CLAUDE_CODE_USE_VERTEX) are kept. + */ +import { config } from 'dotenv'; +import { resolve } from 'path'; + +/** The four filenames Bun auto-loads from CWD (in loading order). */ +const BUN_AUTO_LOADED_ENV_FILES = ['.env', '.env.local', '.env.development', '.env.production']; + +/** CLAUDE_CODE_* vars that are auth-related and must be kept in process.env. */ +const CLAUDE_CODE_AUTH_VARS = new Set([ + 'CLAUDE_CODE_OAUTH_TOKEN', + 'CLAUDE_CODE_USE_BEDROCK', + 'CLAUDE_CODE_USE_VERTEX', +]); + +/** + * Strip CWD .env keys and nested Claude Code session markers from process.env. + * Keys in ~/.archon/.env (loaded afterward by each entry point) are unaffected. + * Safe to call even when no CWD .env files exist. + */ +export function stripCwdEnv(cwd: string = process.cwd()): void { + // --- Pass 1: CWD .env files --- + const cwdKeys = new Set(); + + for (const filename of BUN_AUTO_LOADED_ENV_FILES) { + const filepath = resolve(cwd, filename); + // dotenv.config with processEnv:{} parses without writing to process.env + const result = config({ path: filepath, processEnv: {} }); + if (result.error) { + // ENOENT is expected (file simply doesn't exist) — all others are unexpected + const code = (result.error as NodeJS.ErrnoException).code; + if (code !== 'ENOENT') { + process.stderr.write( + `[archon] Warning: could not parse ${filepath} for CWD env stripping: ${result.error.message}\n` + ); + } + } else if (result.parsed) { + for (const key of Object.keys(result.parsed)) { + cwdKeys.add(key); + } + } + } + + for (const key of cwdKeys) { + Reflect.deleteProperty(process.env, key); + } + + // --- Pass 2: Nested Claude Code session markers --- + // Pattern-matched (not hardcoded) so new CLAUDE_CODE_* markers added by + // future Claude Code versions are automatically handled. + // Emit warning BEFORE deleting — downstream code won't see CLAUDECODE=1. + if (process.env.CLAUDECODE === '1' && !process.env.ARCHON_SUPPRESS_NESTED_CLAUDE_WARNING) { + process.stderr.write( + '\u26a0 Detected CLAUDECODE=1 \u2014 running inside a Claude Code session.\n' + + ' If workflows hang silently, this is a known class of issue.\n' + + ' Workaround: run `archon serve` from a regular shell.\n' + + ' Suppress: set ARCHON_SUPPRESS_NESTED_CLAUDE_WARNING=1\n' + + ' Details: https://github.com/coleam00/Archon/issues/1067\n' + ); + } + if (process.env.CLAUDECODE) { + Reflect.deleteProperty(process.env, 'CLAUDECODE'); + } + for (const key of Object.keys(process.env)) { + if (key.startsWith('CLAUDE_CODE_') && !CLAUDE_CODE_AUTH_VARS.has(key)) { + Reflect.deleteProperty(process.env, key); + } + } + + // Strip debugger vars that crash Claude Code subprocesses + // See: https://github.com/anthropics/claude-code/issues/4619 + Reflect.deleteProperty(process.env, 'NODE_OPTIONS'); + Reflect.deleteProperty(process.env, 'VSCODE_INSPECTOR_OPTIONS'); +} diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 7152aec8b4..0b502008d6 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -3,12 +3,13 @@ * Multi-platform AI coding assistant (Telegram, Discord, Slack, GitHub, Gitea) */ -// Load environment variables FIRST — before any application imports. -// -// Credential safety: target repo `.env` keys (like CLAUDE_API_KEY) that Bun -// auto-loads from CWD cannot leak into AI subprocesses because -// SUBPROCESS_ENV_ALLOWLIST blocks them. The env-leak gate provides a second -// layer by scanning target repos before spawning. No CWD stripping needed. +// Strip CWD .env keys FIRST — before any application imports read process.env. +// Bun auto-loads .env/.env.local/.env.development/.env.production from CWD; +// when `bun run dev:server` is run from inside a target repo those keys leak +// into the server process. stripCwdEnv() removes them before ~/.archon/.env loads. +import '@archon/paths/strip-cwd-env-boot'; + +// Load environment variables — after CWD stripping, before application imports. import { config } from 'dotenv'; import { resolve } from 'path'; import { existsSync } from 'fs'; @@ -39,6 +40,9 @@ if (existsSync(globalEnvPath)) { } } +// CLAUDECODE=1 warning is emitted inside stripCwdEnv() (boot import above) +// BEFORE the marker is deleted from process.env. No duplicate warning here. + // Smart default: use Claude Code's built-in OAuth if no explicit credentials if ( !process.env.CLAUDE_API_KEY &&