diff --git a/.github/agents/registry.yml b/.github/agents/registry.yml new file mode 100644 index 00000000..2b6def67 --- /dev/null +++ b/.github/agents/registry.yml @@ -0,0 +1,16 @@ +version: 1 + +default_agent: codex + +agents: + codex: + runner_workflow: .github/workflows/reusable-codex-run.yml + required_secrets: + - CODEX_AUTH_JSON + branch_prefix: codex/issue- + ui_mentions_allowed: false + capabilities: + pr_keepalive: true + pr_autofix: true + belt: true + verifier_checkbox: true diff --git a/.github/scripts/agent_registry.js b/.github/scripts/agent_registry.js new file mode 100644 index 00000000..49323108 --- /dev/null +++ b/.github/scripts/agent_registry.js @@ -0,0 +1,245 @@ +'use strict'; + +const fs = require('node:fs'); + +function stripTrailingComment(rawLine) { + const line = String(rawLine ?? ''); + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) { + return ''; + } + + // Keep this intentionally simple: our registry YAML should not rely on inline comments. + const match = line.match(/^(.*?)(\s+#.*)?$/); + return (match?.[1] ?? line).replace(/\s+$/, ''); +} + +function parseScalar(value) { + const raw = String(value ?? '').trim(); + if (!raw) { + return ''; + } + + if (raw === 'true') { + return true; + } + if (raw === 'false') { + return false; + } + + if (/^-?\d+$/.test(raw)) { + return Number(raw); + } + + const quoted = raw.match(/^(['"])(.*)\1$/); + if (quoted) { + return quoted[2]; + } + + return raw; +} + +function countIndent(line) { + // Match all leading horizontal whitespace (spaces and tabs). + const match = String(line).match(/^([ \t]*)/); + const indentPrefix = match?.[1] ?? ''; + if (indentPrefix.includes('\t')) { + throw new Error('Registry YAML must use spaces only (tabs are not allowed)'); + } + if (indentPrefix.length % 2 !== 0) { + throw new Error( + `Registry YAML indentation must be multiples of 2 spaces (got ${indentPrefix.length})`, + ); + } + return indentPrefix.length; +} + +function findNextMeaningfulLine(lines, startIndex) { + for (let index = startIndex; index < lines.length; index += 1) { + const stripped = stripTrailingComment(lines[index]); + if (!stripped.trim()) { + continue; + } + return { + index, + indent: countIndent(stripped), + trimmed: stripped.trim(), + }; + } + return null; +} + +// Minimal YAML parser for the registry file. +// Supported features: +// - nested mappings via indentation (2 spaces) +// - scalar values (strings, booleans, integers) +// - sequences using "- item" lines (scalar items only) +// Unsupported (intentionally): anchors, multiline strings, flow maps, complex quoting. +function parseRegistryYaml(text) { + const rawLines = String(text ?? '').split(/\r?\n/); + const lines = rawLines.map(stripTrailingComment); + + const root = {}; + const stack = [{ indent: -1, container: root }]; + + for (let lineIndex = 0; lineIndex < lines.length; lineIndex += 1) { + const rawLine = lines[lineIndex]; + if (!rawLine.trim()) { + continue; + } + + const indent = countIndent(rawLine); + const trimmed = rawLine.trim(); + + while (stack.length > 1 && indent <= stack[stack.length - 1].indent) { + stack.pop(); + } + + const parent = stack[stack.length - 1].container; + + if (trimmed.startsWith('- ')) { + if (!Array.isArray(parent)) { + throw new Error(`Unexpected list item at line ${lineIndex + 1}; parent is not a list`); + } + parent.push(parseScalar(trimmed.slice(2))); + continue; + } + + const sepIndex = trimmed.indexOf(':'); + if (sepIndex <= 0) { + throw new Error(`Invalid registry YAML line ${lineIndex + 1}: expected "key: value"`); + } + + const key = trimmed.slice(0, sepIndex).trim(); + const rest = trimmed.slice(sepIndex + 1).trim(); + + if (!key) { + throw new Error(`Invalid registry YAML line ${lineIndex + 1}: empty key`); + } + if (typeof parent !== 'object' || parent === null || Array.isArray(parent)) { + throw new Error(`Invalid registry YAML line ${lineIndex + 1}: cannot assign key under a list`); + } + + if (rest) { + parent[key] = parseScalar(rest); + continue; + } + + const next = findNextMeaningfulLine(lines, lineIndex + 1); + const shouldBeList = Boolean(next && next.indent > indent && next.trimmed.startsWith('- ')); + const child = shouldBeList ? [] : {}; + parent[key] = child; + stack.push({ indent, container: child }); + } + + return root; +} + +function loadAgentRegistry({ registryPath } = {}) { + const path = registryPath || '.github/agents/registry.yml'; + const raw = fs.readFileSync(path, 'utf8'); + const registry = parseRegistryYaml(raw); + if (!registry || typeof registry !== 'object') { + throw new Error('Agent registry did not parse into an object'); + } + if (!registry.agents || typeof registry.agents !== 'object') { + throw new Error('Agent registry missing required "agents" mapping'); + } + if (!registry.default_agent || typeof registry.default_agent !== 'string') { + throw new Error('Agent registry missing required "default_agent" string'); + } + return registry; +} + +function normalizeLabel(label) { + if (!label) { + return ''; + } + if (typeof label === 'string') { + return label.trim().toLowerCase(); + } + if (typeof label === 'object' && typeof label.name === 'string') { + return label.name.trim().toLowerCase(); + } + return ''; +} + +function resolveAgentRoutingFromLabels(labels, { registryPath } = {}) { + const registry = loadAgentRegistry({ registryPath }); + const labelList = Array.isArray(labels) ? labels : []; + const agentLabels = labelList + .map(normalizeLabel) + .filter(Boolean) + .filter((value) => value.startsWith('agent:')); + + const requestedAgents = agentLabels.map((value) => value.slice('agent:'.length)); + const uniqueRequested = new Set(requestedAgents); + const hasAuto = uniqueRequested.has('auto'); + const explicitRequested = Array.from(uniqueRequested).filter((value) => value !== 'auto'); + + if (explicitRequested.length > 1) { + throw new Error(`Multiple agent labels present: ${explicitRequested.join(', ')}`); + } + if (hasAuto && explicitRequested.length > 0) { + throw new Error(`Multiple agent labels present: auto, ${explicitRequested[0]}`); + } + + let mode = 'default'; + let agentKey = registry.default_agent; + let requested = null; + + if (explicitRequested.length === 1) { + mode = 'explicit'; + agentKey = explicitRequested[0]; + requested = agentKey; + } else if (hasAuto) { + mode = 'auto'; + agentKey = registry.default_agent; + requested = 'auto'; + } + + if (!registry.agents[agentKey]) { + const known = Object.keys(registry.agents).sort(); + throw new Error(`Unknown agent key: ${agentKey}. Known agents: ${known.join(', ') || '(none)'}`); + } + + return { + mode, + agentKey, + requested, + }; +} + +function resolveAgentFromLabels(labels, { registryPath } = {}) { + const routing = resolveAgentRoutingFromLabels(labels, { registryPath }); + return routing.agentKey; +} + +function getAgentConfig(agentKey, { registryPath } = {}) { + const registry = loadAgentRegistry({ registryPath }); + const key = String(agentKey || '').trim() || registry.default_agent; + const config = registry.agents[key]; + if (!config) { + const known = Object.keys(registry.agents).sort(); + throw new Error(`Unknown agent key: ${key}. Known agents: ${known.join(', ') || '(none)'}`); + } + return config; +} + +function getRunnerWorkflow(agentKey, { registryPath } = {}) { + const config = getAgentConfig(agentKey, { registryPath }); + const workflow = String(config.runner_workflow || '').trim(); + if (!workflow) { + throw new Error(`Agent config missing runner_workflow for agent: ${agentKey}`); + } + return workflow; +} + +module.exports = { + getAgentConfig, + getRunnerWorkflow, + loadAgentRegistry, + parseRegistryYaml, + resolveAgentFromLabels, + resolveAgentRoutingFromLabels, +}; diff --git a/.github/scripts/keepalive_loop.js b/.github/scripts/keepalive_loop.js index 96cdf80e..76483af4 100644 --- a/.github/scripts/keepalive_loop.js +++ b/.github/scripts/keepalive_loop.js @@ -1870,12 +1870,36 @@ async function evaluateKeepaliveLoop({ github: rawGithub, context, core, payload let gateRateLimit = false; const config = parseConfig(pr.body || ''); - const labels = Array.isArray(pr.labels) ? pr.labels.map((label) => normalise(label.name).toLowerCase()) : []; - - // Extract agent type from agent:* labels (supports agent:codex, agent:claude, etc.) - const agentLabel = labels.find((label) => label.startsWith('agent:')); - const agentType = agentLabel ? agentLabel.replace('agent:', '') : ''; - const hasAgentLabel = Boolean(agentType); + const labels = Array.isArray(pr.labels) + ? pr.labels.map((label) => normalise(label.name).toLowerCase()) + : []; + + // Phase 2: Resolve agent via registry helper when an explicit agent:* label is present. + // Keepalive stays opt-in: no agent label => keepalive disabled. + const explicitAgentLabel = labels.find((label) => label.startsWith('agent:')); + const requestedAgentKeys = Array.from( + new Set(labels.filter((label) => label.startsWith('agent:')).map((label) => label.slice('agent:'.length))), + ); + let agentType = ''; + let hasAgentLabel = false; + if (explicitAgentLabel) { + hasAgentLabel = true; + try { + const { resolveAgentRoutingFromLabels } = require('./agent_registry.js'); + const routing = resolveAgentRoutingFromLabels(pr.labels); + agentType = routing.agentKey; + } catch (error) { + // Keep conflict states safe: do not silently route to default agent. + // If multiple agent:* labels are present, treat as invalid and disable keepalive. + if (requestedAgentKeys.length > 1) { + hasAgentLabel = false; + agentType = ''; + } else { + // Preserve the explicit requested label key (including unknown agents like "claude"). + agentType = requestedAgentKeys[0] || explicitAgentLabel.replace('agent:', ''); + } + } + } const hasHighPrivilege = labels.includes('agent-high-privilege'); const keepaliveEnabled = config.keepalive_enabled && hasAgentLabel; diff --git a/.github/workflows/agents-71-codex-belt-dispatcher.yml b/.github/workflows/agents-71-codex-belt-dispatcher.yml index 1a43c622..cdf1b1a9 100644 --- a/.github/workflows/agents-71-codex-belt-dispatcher.yml +++ b/.github/workflows/agents-71-codex-belt-dispatcher.yml @@ -5,6 +5,11 @@ name: Agents 71 Codex Belt Dispatcher on: workflow_call: inputs: + agent_key: + description: 'Agent key to dispatch (default: codex)' + required: false + default: 'codex' + type: string force_issue: description: 'Optional issue number to dispatch immediately' required: false @@ -23,6 +28,9 @@ on: WORKFLOWS_APP_PRIVATE_KEY: required: false outputs: + agent_key: + description: 'Agent key used for dispatch' + value: ${{ jobs.dispatch.outputs.agent_key }} issue: description: 'Issue selected for dispatch' value: ${{ jobs.dispatch.outputs.issue }} @@ -40,6 +48,11 @@ on: value: ${{ jobs.dispatch.outputs.dry_run }} workflow_dispatch: inputs: + agent_key: + description: 'Agent key to dispatch (default: codex)' + required: false + default: 'codex' + type: string force_issue: description: 'Optional issue number to dispatch immediately' required: false @@ -65,6 +78,7 @@ jobs: name: Select next Codex issue runs-on: ubuntu-latest outputs: + agent_key: ${{ steps.pick.outputs.agent_key || '' }} issue: ${{ steps.pick.outputs.issue || '' }} branch: ${{ steps.pick.outputs.branch || '' }} base: ${{ steps.pick.outputs.base || '' }} @@ -190,6 +204,8 @@ jobs: ref: ${{ steps.workflows_ref.outputs.ref }} sparse-checkout: | .github/actions/setup-api-client + .github/agents/registry.yml + .github/scripts/agent_registry.js .github/scripts/github-api-with-retry.js .github/scripts/token_load_balancer.js sparse-checkout-cone-mode: false @@ -207,10 +223,11 @@ jobs: task: 'codex-belt-dispatcher-pick', }); const forced = '${{ inputs.force_issue }}'; + const agentKey = String('${{ inputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; const { owner, repo } = context.repo; const summary = core.summary; - summary.addHeading('Codex Belt Dispatcher'); + summary.addHeading(`Belt Dispatcher (agent: ${agentKey})`); let issueNumber = null; let reason = ''; @@ -225,7 +242,7 @@ jobs: owner, repo, state: 'open', - labels: 'agent:codex,status:ready', + labels: `agent:${agentKey},status:ready`, sort: 'created', direction: 'asc', per_page: 30, @@ -238,7 +255,11 @@ jobs: } if (!issueNumber) { - summary.addRaw('No open issues with labels `agent:codex` and `status:ready` were found.').write(); + summary + .addRaw( + `No open issues with labels \`agent:${agentKey}\` and \`status:ready\` were found.` + ) + .write(); core.setOutput('issue', ''); core.setOutput('reason', 'empty'); return; @@ -261,8 +282,19 @@ jobs: core.setFailed('Repository default branch not available'); return; } - const branch = `codex/issue-${issueNumber}`; + let branchPrefix = 'codex/issue-'; + try { + const { getAgentConfig } = require('./.github/scripts/agent_registry.js'); + const cfg = getAgentConfig(agentKey); + branchPrefix = String(cfg.branch_prefix || branchPrefix); + } catch (error) { + core.warning(`Could not load agent registry; defaulting branch prefix: ${error.message}`); + } + + const branch = `${branchPrefix}${issueNumber}`; + + core.setOutput('agent_key', agentKey); core.setOutput('issue', String(issueNumber)); core.setOutput('branch', branch); core.setOutput('base', base); diff --git a/.github/workflows/agents-72-codex-belt-worker-dispatch.yml b/.github/workflows/agents-72-codex-belt-worker-dispatch.yml index cdde5ef9..543af84f 100644 --- a/.github/workflows/agents-72-codex-belt-worker-dispatch.yml +++ b/.github/workflows/agents-72-codex-belt-worker-dispatch.yml @@ -4,6 +4,11 @@ name: Agents 72 Codex Belt Worker Dispatch on: workflow_dispatch: inputs: + agent_key: + description: 'Agent key for this belt run (default: codex)' + required: false + default: 'codex' + type: string issue: description: 'Issue number' required: true @@ -54,6 +59,7 @@ jobs: name: Run Codex belt worker uses: ./.github/workflows/agents-72-codex-belt-worker.yml with: + agent_key: ${{ inputs.agent_key }} issue: ${{ inputs.issue }} branch: ${{ inputs.branch }} base: ${{ inputs.base }} diff --git a/.github/workflows/agents-72-codex-belt-worker.yml b/.github/workflows/agents-72-codex-belt-worker.yml index ab080141..a6f16874 100644 --- a/.github/workflows/agents-72-codex-belt-worker.yml +++ b/.github/workflows/agents-72-codex-belt-worker.yml @@ -5,6 +5,11 @@ name: Agents 72 Codex Belt Worker on: workflow_call: inputs: + agent_key: + description: 'Agent key for this belt run (default: codex)' + required: false + default: 'codex' + type: string issue: description: 'Issue number' required: true @@ -224,6 +229,7 @@ jobs: with: github-token: ${{ env.GH_BELT_TOKEN }} script: | + const agentKey = String('${{ inputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; const issueInput = '${{ inputs.issue }}'.trim(); const branchInput = '${{ inputs.branch }}'.trim(); const baseInput = '${{ inputs.base }}'.trim(); @@ -241,10 +247,6 @@ jobs: } let branch = branchInput; - if (!branch && issue) { - branch = `codex/issue-${issue}`; - } - let base = baseInput; let source = sourceInput; @@ -256,25 +258,29 @@ jobs: core.setFailed('Worker missing branch name.'); return; } - if (!branch.startsWith('codex/issue-')) { - core.warning(`Unexpected branch naming: ${branch}`); - } + + // Branch prefix validation is performed later after checkout (needs registry). const runId = context.runId; const concurrencyKey = branch || issue || runId; - const concurrencyGroup = concurrencyKey ? `codex-belt-${concurrencyKey}` : ''; + const concurrencyGroup = concurrencyKey + ? (agentKey === 'codex' + ? `codex-belt-${concurrencyKey}` + : `belt-${agentKey}-${concurrencyKey}`) + : ''; if (!concurrencyGroup) { core.setFailed('Unable to determine concurrency group.'); return; } + core.setOutput('agent_key', agentKey); core.setOutput('issue', String(issue)); core.setOutput('branch', branch); core.setOutput('base', base); core.setOutput('source', source); core.setOutput('concurrency_group', concurrencyGroup); core.summary - .addHeading('Codex Belt Worker') + .addHeading(`Belt Worker (agent: ${agentKey})`) .addTable([[{ data: 'Issue', header: true }, { data: 'Branch', header: true }, { data: 'Source', header: true }], [`#${issue}`, branch, source]]) .addTable([[{ data: 'Concurrency Group', header: true }, { data: 'Issue', header: true }, { data: 'Branch', header: true }], [concurrencyGroup, `#${issue}`, branch]]) .addHeading('Branch Freshness Safeguards') @@ -479,7 +485,7 @@ jobs: const { owner, repo } = context.repo; const issueBranch = '${{ steps.ctx.outputs.branch }}'; - const prefix = `codex/issue-${issue}/step/`; + const prefix = `${issueBranch}/step/`; const deleted = []; try { @@ -562,10 +568,14 @@ jobs: client.rest.issues.get({ owner, repo, issue_number: issue }) ); - const labelNames = Array.isArray(data.labels) ? data.labels.map((l) => String(l.name || '')) : []; - const hasCodex = labelNames.some((name) => name === 'agent:codex'); - if (!hasCodex) { - core.setFailed(`Issue #${issue} no longer carries the agent:codex label.`); + const agentKey = String('${{ steps.ctx.outputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; + const labelNames = Array.isArray(data.labels) + ? data.labels.map((l) => String(l.name || '').trim().toLowerCase()) + : []; + const requiredAgentLabel = `agent:${agentKey}`; + const hasAgent = labelNames.includes(requiredAgentLabel); + if (!hasAgent) { + core.setFailed(`Issue #${issue} no longer carries the ${requiredAgentLabel} label.`); return; } const hasReady = labelNames.some((name) => name === 'status:ready'); @@ -668,7 +678,6 @@ jobs: status="${{ steps.freshness.outputs.status }}" branch="${BRANCH}" - issue="${ISSUE}" if [ -z "$status" ]; then echo '::error::Freshness check did not report a status.' @@ -699,7 +708,7 @@ jobs: short=$(git rev-parse --short "origin/$branch") timestamp=$(date -u +%Y%m%d%H%M%S) - step_branch="codex/issue-${issue}/step/${timestamp}-${short}" + step_branch="${branch}/step/${timestamp}-${short}" git checkout --detach "origin/$branch" git checkout -b "$step_branch" @@ -1140,7 +1149,8 @@ jobs: }); const prNumber = Number('${{ steps.pr.outputs.number }}'); const { owner, repo } = context.repo; - const labels = ['agent:codex', 'autofix', 'from:codex']; + const agentKey = String('${{ steps.ctx.outputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; + const labels = [`agent:${agentKey}`, 'autofix', `from:${agentKey}`]; try { await withRetry((client) => client.rest.issues.addLabels({ owner, repo, issue_number: prNumber, labels }) @@ -1164,7 +1174,10 @@ jobs: const prNumber = Number('${{ steps.pr.outputs.number }}'); const issue = Number('${{ steps.ctx.outputs.issue }}'); const { owner, repo } = context.repo; - const assignees = ['chatgpt-codex-connector', 'stranske-automation-bot']; + const agentKey = String('${{ steps.ctx.outputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; + const assignees = agentKey === 'codex' + ? ['chatgpt-codex-connector', 'stranske-automation-bot'] + : ['stranske-automation-bot']; for (const target of [prNumber, issue]) { try { await withRetry((client) => diff --git a/.github/workflows/agents-73-codex-belt-conveyor.yml b/.github/workflows/agents-73-codex-belt-conveyor.yml index 59ab8f18..f1557205 100644 --- a/.github/workflows/agents-73-codex-belt-conveyor.yml +++ b/.github/workflows/agents-73-codex-belt-conveyor.yml @@ -5,6 +5,11 @@ name: Agents 73 Codex Belt Conveyor on: workflow_call: inputs: + agent_key: + description: 'Agent key for this belt run (default: codex)' + required: false + default: 'codex' + type: string issue: description: 'Source issue number for the Codex belt PR' required: true @@ -42,7 +47,15 @@ permissions: actions: write concurrency: - group: codex-belt-conveyor-${{ inputs.branch || format('issue-{0}', inputs.issue) || github.run_id }} + group: >- + ${{ + (inputs.agent_key || 'codex') == 'codex' + && format('codex-belt-conveyor-{0}', inputs.branch || format('issue-{0}', inputs.issue) || github.run_id) + || format('belt-{0}-conveyor-{1}', + inputs.agent_key || 'codex', + inputs.branch || format('issue-{0}', inputs.issue) || github.run_id + ) + }} cancel-in-progress: true jobs: @@ -155,7 +168,9 @@ jobs: uses: actions/checkout@v6 with: sparse-checkout: | + .github/agents/registry.yml .github/actions/setup-api-client + .github/scripts/agent_registry.js .github/scripts/github-api-with-retry.js sparse-checkout-cone-mode: false @@ -179,12 +194,17 @@ jobs: const issueValue = Number(issueRaw); const issueNumber = Number.isFinite(issueValue) && issueValue > 0 ? issueValue : null; const branch = '${{ inputs.branch }}'.trim(); + const agentKey = String('${{ inputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; const prNumber = Number('${{ inputs.pr_number }}'); const targetPr = Number.isFinite(prNumber) ? `#${prNumber}` : 'Unknown'; const modeDisplay = dryRun ? 'Preview (dry run)' : 'Live (merge ready)'; const runId = context.runId; const concurrencyKey = branch || issueNumber || runId; - const concurrencyGroup = concurrencyKey ? `codex-belt-conveyor-${concurrencyKey}` : ''; + const concurrencyGroup = concurrencyKey + ? (agentKey === 'codex' + ? `codex-belt-conveyor-${concurrencyKey}` + : `belt-${agentKey}-conveyor-${concurrencyKey}`) + : ''; if (!concurrencyGroup) { core.setFailed('Unable to determine conveyor concurrency group.'); return; @@ -192,7 +212,7 @@ jobs: const issueDisplay = issueNumber ? `#${issueNumber}` : 'Unknown'; const branchDisplay = branch || '(unspecified)'; summary - .addHeading('Codex Belt Conveyor') + .addHeading(`Belt Conveyor (agent: ${agentKey})`) .addTable([ [ { data: 'Issue', header: true }, @@ -261,9 +281,21 @@ jobs: core.setFailed(`PR #${prNumber} is running on ${headBranch} instead of ${branch}.`); return; } - const match = headBranch.match(/^codex\/issue-(\d+)$/); + const agentKey = String('${{ inputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; + let branchPrefix = 'codex/issue-'; + try { + const { getAgentConfig } = require('./.github/scripts/agent_registry.js'); + const cfg = getAgentConfig(agentKey); + branchPrefix = String(cfg.branch_prefix || branchPrefix); + } catch (error) { + core.warning(`Could not load agent registry; defaulting branch prefix: ${error.message}`); + } + + const escapeRegex = (value) => String(value).replace(/[.*+?^${}()|[\[\]\\]/g, '\\$&'); + const pattern = `^${escapeRegex(branchPrefix)}(\\d+)$`; + const match = headBranch.match(new RegExp(pattern)); if (!match) { - core.setFailed(`Branch ${headBranch} is not a codex belt branch.`); + core.setFailed(`Branch ${headBranch} is not a belt branch for agent ${agentKey}.`); return; } const inferredIssue = Number(match[1]); @@ -463,7 +495,13 @@ jobs: } } try { - await withRetry(() => github.rest.issues.createComment({ owner, repo, issue_number: issue, body: 'Merged via Codex Belt Conveyor after Gate success.' })); + const agentKey = String('${{ inputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; + await withRetry(() => github.rest.issues.createComment({ + owner, + repo, + issue_number: issue, + body: `Merged via belt conveyor (agent: ${agentKey}) after Gate success.`, + })); } catch (error) { core.warning(`Failed to comment on issue #${issue}: ${error.message}`); } @@ -517,12 +555,16 @@ jobs: }; const { withRetry } = retryHelpers; const { owner, repo } = context.repo; + const agentKey = String('${{ inputs.agent_key }}' || 'codex').trim().toLowerCase() || 'codex'; try { await withRetry(() => github.rest.actions.createWorkflowDispatch({ owner, repo, workflow_id: 'agents-71-codex-belt-dispatcher.yml', - ref: 'refs/heads/' + (process.env.GITHUB_REF_NAME || context.ref.replace('refs/heads/', '')) + ref: 'refs/heads/' + (process.env.GITHUB_REF_NAME || context.ref.replace('refs/heads/', '')), + inputs: { + agent_key: agentKey, + }, })); } catch (error) { core.warning(`Failed to re-dispatch dispatcher: ${error.message}`); diff --git a/.github/workflows/agents-auto-pilot.yml b/.github/workflows/agents-auto-pilot.yml index 3516e6b6..3601c1a8 100644 --- a/.github/workflows/agents-auto-pilot.yml +++ b/.github/workflows/agents-auto-pilot.yml @@ -1537,13 +1537,26 @@ jobs: const issueNumber = parseInt(process.env.ISSUE_NUMBER); const stepCount = parseInt(process.env.STEP_COUNT || '0') + 1; + let agentKey = 'codex'; + try { + const { loadAgentRegistry } = require('./.github/scripts/agent_registry.js'); + const registry = loadAgentRegistry(); + agentKey = String(registry.default_agent || agentKey) + .trim() + .toLowerCase() || agentKey; + } catch (error) { + core.warning( + `Failed to load agent registry; defaulting to ${agentKey}: ${error.message}` + ); + } + await withRetry((client) => client.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: issueNumber, body: `šŸ¤– **Auto-pilot step ${stepCount}**: Issue prepared! Assigning to agent... - Adding \`agent:codex\` label. The capability check will run automatically. + Adding \`agent:${agentKey}\` label. The capability check will run automatically. ā³ Agent will create a PR shortly.` })); @@ -1553,7 +1566,7 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: issueNumber, - labels: ['agent:codex'] + labels: [`agent:${agentKey}`] })); let baseBranch = context.payload?.repository?.default_branch || ''; @@ -1581,13 +1594,18 @@ jobs: workflow_id: 'agents-71-codex-belt-dispatcher.yml', ref: baseBranch, inputs: { + agent_key: agentKey, force_issue: issueNumber.toString(), dry_run: 'false' } })); - core.info(`Dispatched codex belt dispatcher for issue #${issueNumber}`); + core.info( + `Dispatched belt dispatcher (agent: ${agentKey}) for issue #${issueNumber}` + ); } catch (dispatchError) { - core.warning(`Could not dispatch codex belt dispatcher: ${dispatchError?.message}`); + core.warning( + `Could not dispatch belt dispatcher: ${dispatchError?.message}` + ); } - name: Metrics - End capability check timer @@ -1784,7 +1802,26 @@ jobs: const issueNumber = parseInt(process.env.ISSUE_NUMBER); const issueTitle = process.env.ISSUE_TITLE || `Issue #${issueNumber}`; const stepCount = parseInt(process.env.STEP_COUNT || '0') + 1; - const branchName = `codex/issue-${issueNumber}`; + let agentKey = 'codex'; + let branchPrefix = 'codex/issue-'; + try { + const { + loadAgentRegistry, + getAgentConfig, + } = require('./.github/scripts/agent_registry.js'); + const registry = loadAgentRegistry(); + agentKey = String(registry.default_agent || agentKey) + .trim() + .toLowerCase() || agentKey; + const cfg = getAgentConfig(agentKey); + branchPrefix = String(cfg.branch_prefix || branchPrefix); + } catch (error) { + core.warning( + `Failed to load agent registry; defaulting branch prefix: ${error.message}` + ); + } + + const branchName = `${branchPrefix}${issueNumber}`; const maxStallRetries = parseInt(process.env.MAX_STALL_RETRIES || '5'); // Helper: count consecutive "waiting" comments (stall detection) @@ -2067,6 +2104,7 @@ jobs: workflow_id: 'agents-72-codex-belt-worker-dispatch.yml', ref: baseBranch, inputs: { + agent_key: agentKey, issue: issueNumber.toString(), branch: branchName, base: baseBranch, diff --git a/.github/workflows/agents-autofix-loop.yml b/.github/workflows/agents-autofix-loop.yml index 902cdf83..630fdfa4 100644 --- a/.github/workflows/agents-autofix-loop.yml +++ b/.github/workflows/agents-autofix-loop.yml @@ -38,6 +38,7 @@ jobs: head_sha: ${{ steps.evaluate.outputs.head_sha }} appendix: ${{ steps.evaluate.outputs.appendix }} stop_reason: ${{ steps.evaluate.outputs.stop_reason }} + agent_type: ${{ steps.evaluate.outputs.agent_type }} attempts: ${{ steps.evaluate.outputs.attempts }} max_attempts: ${{ steps.evaluate.outputs.max_attempts }} trigger_reason: ${{ steps.evaluate.outputs.trigger_reason }} @@ -52,6 +53,8 @@ jobs: uses: actions/checkout@v6 with: sparse-checkout: | + .github/agents/registry.yml + .github/scripts/agent_registry.js .github/scripts/prompt_injection_guard.js .github/actions/setup-api-client .github/scripts/github-api-with-retry.js @@ -154,6 +157,7 @@ jobs: head_sha: '', appendix: '', stop_reason: '', + agent_type: '', attempts: '0', max_attempts: '2', trigger_reason: 'unknown', @@ -220,13 +224,24 @@ jobs: .map((label) => (label?.name || '').toLowerCase()) .filter(Boolean) : []; - const hasAgentLabel = labels.includes('agent:codex'); + + const hasExplicitAgentLabel = labels.some((label) => label.startsWith('agent:')); + let agentType = ''; + try { + const { resolveAgentFromLabels } = require('./.github/scripts/agent_registry.js'); + agentType = resolveAgentFromLabels(prData.labels); + } catch (error) { + const agentLabel = labels.find((label) => label.startsWith('agent:')); + agentType = agentLabel ? agentLabel.slice('agent:'.length) : 'codex'; + } + + outputs.agent_type = String(agentType || ''); const body = prData.body || ''; const configMatch = body.match(/autofix\s*:\s*(true|false)/i); let autofixEnabled = configMatch ? configMatch[1].toLowerCase() === 'true' - : hasAgentLabel; + : hasExplicitAgentLabel; // Auto-escalation: Escalate to Codex CLI when Gate fails // Triggers if: (1) basic autofix ran but insufficient, OR (2) no basic autofix applied @@ -265,6 +280,14 @@ jobs: return stop('autofix disabled for this pull request'); } + // Phase 2: We only support Codex CLI autofix for now. + if ((outputs.agent_type || '') && outputs.agent_type !== 'codex') { + return stop( + `unsupported agent type for autofix loop: ${outputs.agent_type}`, + 'unsupported_agent' + ); + } + const jobs = await paginateWithRetry( github, github.rest.actions.listJobsForWorkflowRun, @@ -395,7 +418,7 @@ jobs: autofix: needs: prepare - if: needs.prepare.outputs.should_run == 'true' + if: needs.prepare.outputs.should_run == 'true' && needs.prepare.outputs.agent_type == 'codex' name: Run Codex autofix uses: stranske/Workflows/.github/workflows/reusable-codex-run.yml@main with: diff --git a/.github/workflows/agents-verify-to-issue-v2.yml b/.github/workflows/agents-verify-to-issue-v2.yml index eec7149a..9a99d60d 100644 --- a/.github/workflows/agents-verify-to-issue-v2.yml +++ b/.github/workflows/agents-verify-to-issue-v2.yml @@ -74,6 +74,8 @@ jobs: token: ${{ steps.select-token.outputs.token }} sparse-checkout: | .github/actions/setup-api-client + .github/agents/registry.yml + .github/scripts/agent_registry.js .github/scripts/github-api-with-retry.js .github/scripts/token_load_balancer.js scripts/langchain @@ -332,6 +334,23 @@ jobs: }; const { withRetry } = retryHelpers; + let agentKey = 'codex'; + try { + const { resolveAgentFromLabels } = require('./.github/scripts/agent_registry.js'); + const prLabels = context.payload.pull_request?.labels || []; + agentKey = resolveAgentFromLabels(prLabels, { + registryPath: './.github/agents/registry.yml', + }); + } catch (err) { + core.warning( + `Failed to resolve agent from PR labels; defaulting to codex: ${err.message}` + ); + agentKey = 'codex'; + } + const normalized = String(agentKey || 'codex').trim().toLowerCase() || 'codex'; + const agentLabel = `agent:${normalized}`; + const fromLabel = `from:${normalized}`; + const title = process.env.ISSUE_TITLE; const body = process.env.ISSUE_BODY; @@ -345,7 +364,7 @@ jobs: repo: context.repo.repo, title: title, body: body, - labels: ['follow-up', 'agents:optimize'] + labels: ['follow-up', agentLabel, fromLabel, 'agents:optimize'] })); core.info(`Created issue #${issue.data.number}`); @@ -383,7 +402,7 @@ jobs: '**Next steps:**', '1. Review the generated issue', '2. Add `agents:apply-suggestions` label to format for agent work', - '3. Add `agent:codex` label to assign to an agent', + '3. Add an `agent:*` label (e.g., `agent:codex`) to assign to an agent', '', '> Or work on it manually - the choice is yours!' ].join('\n'); diff --git a/.github/workflows/agents-verify-to-issue.yml b/.github/workflows/agents-verify-to-issue.yml index 1453d018..07a9794f 100644 --- a/.github/workflows/agents-verify-to-issue.yml +++ b/.github/workflows/agents-verify-to-issue.yml @@ -183,7 +183,7 @@ jobs: '', '1. Add `agents:optimize` label to get AI-suggested improvements', '2. Add `agents:apply-suggestions` to format for agent work', - '3. Add `agent:codex` to assign to an agent', + '3. Add an `agent:*` label (e.g., `agent:codex`) to assign to an agent', '', 'Or work on it manually - the choice is yours!', '', diff --git a/.github/workflows/agents-verify-to-new-pr.yml b/.github/workflows/agents-verify-to-new-pr.yml index e8866d2c..76cdf6a0 100644 --- a/.github/workflows/agents-verify-to-new-pr.yml +++ b/.github/workflows/agents-verify-to-new-pr.yml @@ -76,6 +76,8 @@ jobs: token: ${{ steps.select-token.outputs.token }} sparse-checkout: | .github/actions/setup-api-client + .github/agents/registry.yml + .github/scripts/agent_registry.js .github/scripts/github-api-with-retry.js .github/scripts/token_load_balancer.js scripts/langchain @@ -736,12 +738,30 @@ jobs: return; } + let agentKey = 'codex'; + try { + const { resolveAgentFromLabels } = require('./.github/scripts/agent_registry.js'); + const labels = + (context.payload.pull_request && context.payload.pull_request.labels) || []; + agentKey = resolveAgentFromLabels(labels, { + registryPath: './.github/agents/registry.yml', + }); + } catch (err) { + core.warning( + `Failed to resolve agent from PR labels; defaulting to codex: ${err.message}` + ); + agentKey = 'codex'; + } + const normalized = String(agentKey || 'codex').trim().toLowerCase() || 'codex'; + const agentLabel = `agent:${normalized}`; + const fromLabel = `from:${normalized}`; + const issue = await withRetry((client) => (client || github).rest.issues.create({ owner: context.repo.owner, repo: context.repo.repo, title: title, body: body, - labels: ['follow-up', 'agent:codex', 'agents:auto-pilot'] + labels: ['follow-up', agentLabel, fromLabel, 'agents:auto-pilot'] })); core.info(`Created issue #${issue.data.number}`); diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index 42fe2842..2fc84d8a 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -3,6 +3,7 @@ # # Triggers: # - Gate workflow completes with failure (lint/format issues detected) +# - Lint job fails early (workflow_job completed) # - PR labeled with 'autofix' or 'autofix:clean' (manual trigger) # # Copy this file to: .github/workflows/autofix.yml @@ -15,6 +16,8 @@ on: workflow_run: workflows: ["Gate", "CI", "Python CI"] types: [completed] + workflow_job: + types: [completed] pull_request_target: types: - labeled @@ -30,7 +33,16 @@ concurrency: group: >- autofix-${{ github.event.pull_request.number - || github.event.workflow_run.pull_requests[0].number + || ( + github.event.workflow_run.pull_requests && + github.event.workflow_run.pull_requests[0] && + github.event.workflow_run.pull_requests[0].number + ) + || ( + github.event.workflow_job.pull_requests && + github.event.workflow_job.pull_requests[0] && + github.event.workflow_job.pull_requests[0].number + ) || github.run_id }} cancel-in-progress: true @@ -149,6 +161,43 @@ jobs: return true; }; + const listFilesOrNullOnRateLimit = async ({ owner, repo, prNumber }) => { + try { + return await paginateWithRetry( + github.rest.pulls.listFiles, + { + owner, + repo, + pull_number: prNumber, + per_page: 100, + }, + { maxRetries: 3 } + ); + } catch (error) { + const message = String(error?.message || error || ''); + const status = Number(error?.status || error?.response?.status || 0); + if (status === 403 && message.toLowerCase().includes('rate limit')) { + core.warning( + 'Rate limited listing PR files; proceeding without file filter.' + ); + return null; + } + throw error; + } + }; + + const setOutputs = ({ pr, sameRepo, callerActor }) => { + const labels = (pr.labels || []).map((l) => l.name); + core.setOutput('should_run', 'true'); + core.setOutput('pr_number', pr.number); + core.setOutput('head_ref', pr.head.ref); + core.setOutput('pr_title', pr.title); + core.setOutput('pr_is_draft', pr.draft ? 'true' : 'false'); + core.setOutput('pr_labels_json', JSON.stringify(labels)); + core.setOutput('same_repo', sameRepo ? 'true' : 'false'); + core.setOutput('caller_actor', callerActor); + }; + // --- workflow_run trigger (after Gate/CI completes) --- if (context.eventName === 'workflow_run') { const run = context.payload.workflow_run; @@ -261,28 +310,11 @@ jobs: } // Only run autofix when Python files are present. - let files = []; - try { - files = await paginateWithRetry( - github.rest.pulls.listFiles, - { - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: prNumber, - per_page: 100, - }, - { maxRetries: 3 } - ); - } catch (error) { - const message = String(error?.message || error || ''); - const status = Number(error?.status || error?.response?.status || 0); - if (status === 403 && message.toLowerCase().includes('rate limit')) { - core.warning('Rate limited listing PR files; proceeding without file filter.'); - files = null; - } else { - throw error; - } - } + const files = await listFilesOrNullOnRateLimit({ + owner: context.repo.owner, + repo: context.repo.repo, + prNumber, + }); const hasPython = files === null @@ -298,29 +330,126 @@ jobs: return; } - const labels = - (pr.labels || []).map(l => l.name); + setOutputs({ + pr, + sameRepo, + callerActor: run.actor?.login || context.actor, + }); + return; + } - core.setOutput('should_run', 'true'); - core.setOutput('pr_number', pr.number); - core.setOutput('head_ref', pr.head.ref); - core.setOutput('pr_title', pr.title); - core.setOutput( - 'pr_is_draft', - pr.draft ? 'true' : 'false', - ); - core.setOutput( - 'pr_labels_json', - JSON.stringify(labels), - ); - core.setOutput( - 'same_repo', - sameRepo ? 'true' : 'false', - ); - core.setOutput( - 'caller_actor', - run.actor?.login || context.actor, + // --- workflow_job trigger (early lint failure) --- + if (context.eventName === 'workflow_job') { + const workflowJob = context.payload.workflow_job; + if (!workflowJob) { + core.setOutput('should_run', 'false'); + return; + } + + const workflowName = String(workflowJob.workflow_name || '').trim(); + const jobName = String(workflowJob.name || '').toLowerCase(); + const conclusion = String(workflowJob.conclusion || '').toLowerCase(); + + // Only respond to failing lint jobs in the workflows we care about. + const relevantJob = + jobName.includes('lint-format') || jobName.includes('lint-ruff'); + if (!relevantJob) { + core.info( + `workflow_job '${workflowJob.name}' is not a lint-format/lint-ruff job.` + ); + core.setOutput('should_run', 'false'); + return; + } + + const allowedWorkflows = new Set(['Gate', 'CI', 'Python CI']); + if (workflowName && !allowedWorkflows.has(workflowName)) { + core.info( + `workflow_job is from workflow '${workflowName}', not Gate/CI/Python CI; ` + + 'skipping.' + ); + core.setOutput('should_run', 'false'); + return; + } + + if (conclusion !== 'failure') { + core.info( + `workflow_job '${workflowJob.name}' concluded '${workflowJob.conclusion}' ` + + '— no autofix needed' + ); + core.setOutput('should_run', 'false'); + return; + } + + const prs = workflowJob.pull_requests || []; + if (!prs.length) { + core.info('workflow_job event has no associated PR; skipping.'); + core.setOutput('should_run', 'false'); + return; + } + + const prNumber = prs[0].number; + const triggerHeadSha = String(workflowJob.head_sha || ''); + const { owner, repo } = context.repo; + const { data: pr } = await withRetry((client) => + client.rest.pulls.get({ owner, repo, pull_number: prNumber }) ); + + if (pr.state !== 'open') { + core.info(`PR #${prNumber} is ${pr.state}`); + core.setOutput('should_run', 'false'); + return; + } + + if (pr.draft) { + core.info('PR is draft.'); + core.setOutput('should_run', 'false'); + return; + } + + const sameRepo = + pr.head.repo !== null && + pr.head.repo.full_name === pr.base.repo?.full_name; + if (!sameRepo) { + core.info('Fork PR — not supported.'); + core.setOutput('should_run', 'false'); + return; + } + + const headSha = pr.head?.sha; + if (!headSha) { + core.info('PR head SHA missing; skipping autofix.'); + core.setOutput('should_run', 'false'); + return; + } + + if (triggerHeadSha && triggerHeadSha !== headSha) { + core.info( + `workflow_job head_sha ${triggerHeadSha} does not match PR head ${headSha}; ` + + 'skipping stale event.' + ); + core.setOutput('should_run', 'false'); + return; + } + + const files = await listFilesOrNullOnRateLimit({ owner, repo, prNumber }); + const hasPython = + files === null + ? true + : files.some( + (file) => + file.filename.endsWith('.py') || file.filename.endsWith('.pyi') + ); + if (!hasPython) { + core.info('No Python files changed.'); + core.setOutput('should_run', 'false'); + return; + } + + setOutputs({ + pr, + sameRepo, + callerActor: context.payload.sender?.login || context.actor, + }); return; } @@ -384,27 +513,11 @@ jobs: (pr.labels || []).map(l => l.name); let files = []; - try { - files = await paginateWithRetry( - github.rest.pulls.listFiles, - { - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: pr.number, - per_page: 100, - }, - { maxRetries: 3 } - ); - } catch (error) { - const message = String(error?.message || error || ''); - const status = Number(error?.status || error?.response?.status || 0); - if (status === 403 && message.toLowerCase().includes('rate limit')) { - core.warning('Rate limited listing PR files; proceeding without file filter.'); - files = null; - } else { - throw error; - } - } + files = await listFilesOrNullOnRateLimit({ + owner: context.repo.owner, + repo: context.repo.repo, + prNumber: pr.number, + }); const hasPython = files === null @@ -420,23 +533,11 @@ jobs: return; } - core.setOutput('should_run', 'true'); - core.setOutput('pr_number', pr.number); - core.setOutput('head_ref', pr.head.ref); - core.setOutput('pr_title', pr.title); - core.setOutput( - 'pr_is_draft', - pr.draft ? 'true' : 'false', - ); - core.setOutput( - 'pr_labels_json', - JSON.stringify(labels), - ); - core.setOutput( - 'same_repo', - sameRepo ? 'true' : 'false', - ); - core.setOutput('caller_actor', context.actor); + setOutputs({ + pr, + sameRepo, + callerActor: context.actor, + }); # Call reusable autofix workflow autofix: diff --git a/CLAUDE.md b/CLAUDE.md index ba489457..685aac40 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -49,53 +49,6 @@ When an issue is labeled `agent:codex`: ## Common Issues -## CI Test Policy (PR Gate vs CI vs Release E2E) - -This repo intentionally does **not** run the full test surface area on every PR Gate run. - -### PR Gate (`.github/workflows/pr-00-gate.yml`) - -Goal: fast feedback for most PRs. - -- Runs pytest **in parallel** (xdist): `-n auto --dist loadscope` -- Runs pytest **without coverage** (`coverage: false`) -- Skips integration directories: - - `tests/integration/` - - `tests/integrations/` -- Skips **release/packaging** tests via marker: `pytest_markers: "not release"` - -These suites will NOT run on PR Gate unless you run them manually (see below). - -### Main-branch CI (`.github/workflows/ci.yml`) - -Goal: enforce full quality gates on `main`. - -- Runs pytest **with coverage** (`coverage: true`, `coverage-min` enforced) -- Runs pytest **in parallel** (xdist): `-n auto --dist loadscope` -- Runs the full test suite (including integration dirs and `release` tests) - -### Release/Packaging E2E (`.github/workflows/release-e2e.yml`) - -Goal: keep slow PyInstaller + packaged-executable checks out of PR Gate. - -- Runs nightly on `main` -- Runs on PRs when the PR is labeled: `run-release` -- Executes only the tests marked `release`: `pytest -m release` - -### How to run skipped suites locally - -```bash -# Fast PR-gate-like run (parallel, no coverage, skip release + integration dirs) -pytest -q -n auto --dist loadscope -m "not release" \ - --ignore=tests/integration --ignore=tests/integrations - -# Release / packaging validation (PyInstaller + packaged executable) -pytest -q -m release - -# Integration directories (if you need them on a PR) -pytest -q tests/integration tests/integrations -``` - ### Workflow fails with "workflow file issue" - A reusable workflow is being called that doesn't exist - Check Workflows repo has the required `reusable-*.yml` file diff --git a/scripts/langchain/followup_issue_generator.py b/scripts/langchain/followup_issue_generator.py index f8652168..c898b864 100755 --- a/scripts/langchain/followup_issue_generator.py +++ b/scripts/langchain/followup_issue_generator.py @@ -872,44 +872,43 @@ def _get_llm_client(reasoning: bool = False) -> tuple[Any, str] | None: return resolved.client, resolved.model -def _resolve_run_id() -> str: - return os.environ.get("GITHUB_RUN_ID") or os.environ.get("RUN_ID") or "unknown" - - -def _resolve_repo() -> str: - return os.environ.get("GITHUB_REPOSITORY") or "unknown" - - -def _resolve_issue_or_pr_number(*, pr_number: int | None, issue_number: int | None) -> str: - if pr_number is not None: - return str(pr_number) - env_pr = os.environ.get("PR_NUMBER") - if env_pr and env_pr.isdigit(): - return env_pr - if issue_number is not None: - return str(issue_number) - env_issue = os.environ.get("ISSUE_NUMBER") - if env_issue and env_issue.isdigit(): - return env_issue - return "unknown" - - def _build_llm_config( *, operation: str, pr_number: int | None, issue_number: int | None, ) -> dict[str, object]: - repo = _resolve_repo() - run_id = _resolve_run_id() - issue_or_pr = _resolve_issue_or_pr_number(pr_number=pr_number, issue_number=issue_number) + try: + from tools.llm_provider import build_langsmith_metadata + + return build_langsmith_metadata( + operation=operation, + pr_number=pr_number, + issue_number=issue_number, + ) + except ImportError: + pass + + # Inline fallback when tools.llm_provider is unavailable + repo = os.environ.get("GITHUB_REPOSITORY", "unknown") + run_id = os.environ.get("GITHUB_RUN_ID") or os.environ.get("RUN_ID") or "unknown" + if pr_number is not None: + issue_or_pr = str(pr_number) + elif issue_number is not None: + issue_or_pr = str(issue_number) + else: + env_pr = os.environ.get("PR_NUMBER", "") + env_issue = os.environ.get("ISSUE_NUMBER", "") + issue_or_pr = ( + env_pr if env_pr.isdigit() else env_issue if env_issue.isdigit() else "unknown" + ) metadata = { "repo": repo, "run_id": run_id, "issue_or_pr_number": issue_or_pr, "operation": operation, "pr_number": str(pr_number) if pr_number is not None else None, - "issue_number": str(issue_number) if issue_number is not None else None, + "issue_number": (str(issue_number) if issue_number is not None else None), } tags = [ "workflows-agents", @@ -985,8 +984,12 @@ def _invoke_llm( operation: str, pr_number: int | None, issue_number: int | None, -) -> str: - """Invoke LLM and return response text.""" +) -> tuple[str, str | None, str | None]: + """Invoke LLM and return response text with trace information. + + Returns: + Tuple of (response_text, trace_id, trace_url) + """ try: import langchain_core.messages as lc_messages except ModuleNotFoundError: @@ -1039,24 +1042,38 @@ def normalize_response_content(response: Any) -> str: exc, ) response = client.invoke(messages) - return normalize_response_content(response) - - # langchain_core isn't available. Prefer non-message invoke signatures first. - try: - response = client.invoke(prompt, config=config) - except TypeError as exc: - LOGGER.warning( - "LLM invoke failed with config/metadata; using config/metadata fallback. Error: %s", - exc, - ) + else: + # langchain_core isn't available. Prefer non-message invoke signatures first. try: - response = client.invoke(prompt) - except Exception as inner_exc: - raise RuntimeError( - "Unable to invoke client without langchain_core installed. " - "Install langchain-core or provide a client that accepts plain string prompts." - ) from inner_exc - return normalize_response_content(response) + response = client.invoke(prompt, config=config) + except TypeError as exc: + LOGGER.warning( + "LLM invoke failed with config/metadata; using config/metadata fallback. Error: %s", + exc, + ) + try: + response = client.invoke(prompt) + except Exception as inner_exc: + raise RuntimeError( + "Unable to invoke client without langchain_core installed. " + "Install langchain-core or provide a client that accepts plain string prompts." + ) from inner_exc + + # Extract trace ID from response + trace_id, trace_url = None, None + try: + from tools.llm_provider import derive_langsmith_trace_url, extract_trace_id + + trace_id = extract_trace_id(response) + if trace_id: + trace_url = derive_langsmith_trace_url(trace_id) + LOGGER.info(f"LangSmith trace: {trace_url}") + except ImportError: + LOGGER.debug("tools.llm_provider not available for trace extraction") + except Exception as exc: + LOGGER.debug(f"Failed to extract trace ID: {exc}") + + return normalize_response_content(response), trace_id, trace_url def _extract_json(text: str) -> dict[str, Any]: @@ -1259,7 +1276,7 @@ def _generate_with_llm( iteration_details=iteration_details, ) - analysis_response = _invoke_llm( + analysis_response, trace_id_1, trace_url_1 = _invoke_llm( analyze_prompt, reasoning_client, operation="analyze_verification", @@ -1276,7 +1293,7 @@ def _generate_with_llm( ), # Limit for token budget ) - tasks_response = _invoke_llm( + tasks_response, trace_id_2, trace_url_2 = _invoke_llm( tasks_prompt, standard_client, operation="generate_tasks", @@ -1291,7 +1308,7 @@ def _generate_with_llm( unmet_criteria=json.dumps(analysis.get("rewritten_acceptance_criteria", []), indent=2), ) - ac_response = _invoke_llm( + ac_response, trace_id_3, trace_url_3 = _invoke_llm( ac_prompt, standard_client, operation="generate_acceptance_criteria", @@ -1327,7 +1344,7 @@ def _generate_with_llm( advisory_notes=json.dumps(advisory_concerns, indent=2), ) - issue_body = _invoke_llm( + issue_body, trace_id_4, trace_url_4 = _invoke_llm( format_prompt, standard_client, operation="format_followup_issue", @@ -1337,6 +1354,20 @@ def _generate_with_llm( issue_body = _strip_markdown_fence(issue_body) issue_body = _append_advisory_notes(issue_body, advisory_concerns) + # Append LangSmith trace URLs for observability (as HTML comments) + trace_info = [ + ("analyze_verification", trace_url_1), + ("generate_tasks", trace_url_2), + ("generate_acceptance_criteria", trace_url_3), + ("format_followup_issue", trace_url_4), + ] + trace_comments = [] + for operation, trace_url in trace_info: + if trace_url: + trace_comments.append(f"") + if trace_comments: + issue_body = issue_body + "\n\n" + "\n".join(trace_comments) + # Generate title from concrete tasks concrete_tasks = analysis.get("concrete_tasks", []) if concrete_tasks: diff --git a/scripts/langchain/pr_verifier.py b/scripts/langchain/pr_verifier.py index 9e5d486d..19176118 100755 --- a/scripts/langchain/pr_verifier.py +++ b/scripts/langchain/pr_verifier.py @@ -220,6 +220,8 @@ class EvaluationResult(BaseModel): raw_content: str | None = None error: str | None = None change_type: Literal["infrastructure", "application", "mixed"] | None = None + langsmith_trace_id: str | None = None + langsmith_trace_url: str | None = None class EvaluationPayload(BaseModel): @@ -312,7 +314,7 @@ def from_environment( def run_single(self, client: object, provider: str, model: str) -> EvaluationResult: try: - response = _invoke_llm( + response, trace_id, trace_url = _invoke_llm( client, self.prompt, operation="evaluate_pr_compare", @@ -326,6 +328,8 @@ def run_single(self, client: object, provider: str, model: str) -> EvaluationRes content = getattr(response, "content", None) or str(response) result = _parse_llm_response(content, provider, client=client) result.model = model + result.langsmith_trace_id = trace_id + result.langsmith_trace_url = trace_url return result @@ -438,30 +442,6 @@ def _extract_pr_metadata(context: str) -> tuple[int | None, str | None]: return None, None -def _resolve_run_id() -> str: - return os.environ.get("GITHUB_RUN_ID") or os.environ.get("RUN_ID") or "unknown" - - -def _resolve_repo() -> str: - return os.environ.get("GITHUB_REPOSITORY") or "unknown" - - -def _resolve_issue_or_pr_number( - *, pr_number: int | None = None, issue_number: int | None = None -) -> str: - if pr_number is not None: - return str(pr_number) - env_pr = os.environ.get("PR_NUMBER") - if env_pr and env_pr.isdigit(): - return env_pr - if issue_number is not None: - return str(issue_number) - env_issue = os.environ.get("ISSUE_NUMBER") - if env_issue and env_issue.isdigit(): - return env_issue - return "unknown" - - def _build_llm_config( *, operation: str, @@ -471,16 +451,38 @@ def _build_llm_config( ) -> dict[str, object]: if pr_number is None and context: pr_number, _ = _extract_pr_metadata(context) - repo = _resolve_repo() - run_id = _resolve_run_id() - issue_or_pr = _resolve_issue_or_pr_number(pr_number=pr_number, issue_number=issue_number) + + try: + from tools.llm_provider import build_langsmith_metadata + + return build_langsmith_metadata( + operation=operation, + pr_number=pr_number, + issue_number=issue_number, + ) + except ImportError: + pass + + # Inline fallback when tools.llm_provider is unavailable + repo = os.environ.get("GITHUB_REPOSITORY", "unknown") + run_id = os.environ.get("GITHUB_RUN_ID") or os.environ.get("RUN_ID") or "unknown" + if pr_number is not None: + issue_or_pr = str(pr_number) + elif issue_number is not None: + issue_or_pr = str(issue_number) + else: + env_pr = os.environ.get("PR_NUMBER", "") + env_issue = os.environ.get("ISSUE_NUMBER", "") + issue_or_pr = ( + env_pr if env_pr.isdigit() else env_issue if env_issue.isdigit() else "unknown" + ) metadata = { "repo": repo, "run_id": run_id, "issue_or_pr_number": issue_or_pr, "operation": operation, "pr_number": str(pr_number) if pr_number is not None else None, - "issue_number": str(issue_number) if issue_number is not None else None, + "issue_number": (str(issue_number) if issue_number is not None else None), } tags = [ "workflows-agents", @@ -500,7 +502,12 @@ def _invoke_llm( context: str | None = None, pr_number: int | None = None, issue_number: int | None = None, -) -> object: +) -> tuple[object, str | None, str | None]: + """Invoke LLM and extract trace information. + + Returns: + Tuple of (response, trace_id, trace_url) + """ config = _build_llm_config( operation=operation, context=context, @@ -508,13 +515,30 @@ def _invoke_llm( issue_number=issue_number, ) try: - return client.invoke(prompt, config=config) + response = client.invoke(prompt, config=config) except TypeError as exc: LOGGER.warning( "LLM invoke failed with config/metadata; using config/metadata fallback. Error: %s", exc, ) - return client.invoke(prompt) + response = client.invoke(prompt) + + # Extract trace ID from response if available + trace_id = None + trace_url = None + try: + from tools.llm_provider import derive_langsmith_trace_url, extract_trace_id + + trace_id = extract_trace_id(response) + if trace_id: + trace_url = derive_langsmith_trace_url(trace_id) + LOGGER.info(f"LangSmith trace: {trace_url}") + except ImportError: + LOGGER.debug("tools.llm_provider not available for trace extraction") + except Exception as exc: + LOGGER.debug(f"Failed to extract trace ID: {exc}") + + return response, trace_id, trace_url def _format_scores(scores: EvaluationScores | None) -> list[str]: @@ -699,8 +723,9 @@ def evaluate_pr( prompt = _prepare_prompt(context, diff) change_type = _classify_change_type(diff) pr_number, _ = _extract_pr_metadata(context) + trace_id, trace_url = None, None try: - response = _invoke_llm( + response, trace_id, trace_url = _invoke_llm( client, prompt, operation="evaluate_pr", @@ -715,7 +740,7 @@ def evaluate_pr( if fallback_resolved is not None: fallback_client, fallback_provider_name = fallback_resolved try: - response = _invoke_llm( + response, trace_id, trace_url = _invoke_llm( fallback_client, prompt, operation="evaluate_pr_fallback", @@ -739,9 +764,13 @@ def evaluate_pr( error=f"Primary provider ({provider_name}) failed, used fallback", raw_content=result.raw_content, change_type=change_type, + langsmith_trace_id=trace_id, + langsmith_trace_url=trace_url, ) else: result.change_type = change_type + result.langsmith_trace_id = trace_id + result.langsmith_trace_url = trace_url return result except Exception as fallback_exc: result = _fallback_evaluation( @@ -757,6 +786,8 @@ def evaluate_pr( content = getattr(response, "content", None) or str(response) result = _parse_llm_response(content, provider_name, client=client) result.change_type = change_type + result.langsmith_trace_id = trace_id + result.langsmith_trace_url = trace_url return result @@ -958,6 +989,18 @@ def format_comparison_report(results: list[EvaluationResult]) -> str: lines.append(f"- {labels[index]}: {'; '.join(insights)}") lines.append("") + # Add LangSmith trace links if available + trace_urls = [ + (labels[i], result.langsmith_trace_url) + for i, result in enumerate(results) + if result.langsmith_trace_url + ] + if trace_urls: + lines.append("### šŸ” LangSmith Traces") + for label, url in trace_urls: + lines.append(f"- [{label}]({url})") + lines.append("") + return "\n".join(lines).strip() + "\n" diff --git a/scripts/sync_dev_dependencies.py b/scripts/sync_dev_dependencies.py index b38259c6..90a5dd48 100755 --- a/scripts/sync_dev_dependencies.py +++ b/scripts/sync_dev_dependencies.py @@ -57,10 +57,6 @@ ) -def _is_black_drift(change: str) -> bool: - return change.strip().lower().startswith("black:") - - def parse_env_file(path: Path) -> dict[str, str]: """Parse the autofix-versions.env file into a dict of key=value pairs.""" if not path.exists(): @@ -437,12 +433,6 @@ def main(argv: list[str] | None = None) -> int: return 2 if changes: - if args.check and any(_is_black_drift(change) for change in changes): - print( - "Error: Black formatting pin drift detected (version mismatch/out of sync).", - file=sys.stderr, - ) - print(f"{'Applied' if args.apply else 'Found'} {len(changes)} version updates:") for change in changes: print(f" - {change}") @@ -450,9 +440,9 @@ def main(argv: list[str] | None = None) -> int: if args.check: print("\nRun with --apply to update dependency files") return 1 - - print("\nāœ“ Dependency files updated") - return 0 + else: + print("\nāœ“ Dependency files updated") + return 0 else: print("āœ“ All dev dependency versions are in sync") return 0 diff --git a/tests/test_historical_update.py b/tests/test_historical_update.py index fc3bac58..a1bccb73 100644 --- a/tests/test_historical_update.py +++ b/tests/test_historical_update.py @@ -640,9 +640,7 @@ def test_append_wal_row_preserves_existing_formulas_and_formatting(tmp_path: Pat sheet.cell(row=3, column=2).value = "=2.10" sheet.cell(row=3, column=2).number_format = "0.00" sheet.cell(row=3, column=2).font = styles.Font(bold=True) - sheet.cell(row=3, column=2).fill = styles.PatternFill( - patternType="solid", fgColor="FFFF00" - ) + sheet.cell(row=3, column=2).fill = styles.PatternFill(patternType="solid", fgColor="FFFF00") sheet.cell(row=3, column=2).border = styles.Border( left=styles.Side(style="thin"), right=styles.Side(style="thin"), diff --git a/tools/llm_provider.py b/tools/llm_provider.py index c046a7fe..90682b63 100644 --- a/tools/llm_provider.py +++ b/tools/llm_provider.py @@ -68,6 +68,127 @@ def _setup_langsmith_tracing() -> bool: # This flag can be used to conditionally enable LangSmith-specific features. LANGSMITH_ENABLED = _setup_langsmith_tracing() +LANGSMITH_TRACE_URL_BASE = "https://smith.langchain.com/r/" + + +def build_langsmith_metadata( + *, + operation: str, + repo: str | None = None, + run_id: str | None = None, + issue_or_pr_number: str | None = None, + pr_number: int | None = None, + issue_number: int | None = None, +) -> dict[str, object]: + """Build a standardized LangSmith metadata and tags config dict. + + Returns a dict with ``metadata`` and ``tags`` keys suitable for passing + as ``config=`` to a LangChain ``client.invoke()`` call. When + ``LANGSMITH_API_KEY`` is set the metadata also includes a + ``langsmith_project`` field so traces are grouped correctly. + + The returned dict always has the same shape regardless of whether + LangSmith is enabled. + """ + repo = repo or os.environ.get("GITHUB_REPOSITORY", "unknown") + run_id = run_id or os.environ.get("GITHUB_RUN_ID", "unknown") + + if issue_or_pr_number is None: + if pr_number is not None: + issue_or_pr_number = str(pr_number) + elif issue_number is not None: + issue_or_pr_number = str(issue_number) + else: + env_pr = os.environ.get("PR_NUMBER", "") + env_issue = os.environ.get("ISSUE_NUMBER", "") + issue_or_pr_number = ( + env_pr if env_pr.isdigit() else env_issue if env_issue.isdigit() else "unknown" + ) + + metadata: dict[str, object] = { + "repo": repo, + "run_id": run_id, + "issue_or_pr_number": issue_or_pr_number, + "operation": operation, + "pr_number": str(pr_number) if pr_number is not None else None, + "issue_number": str(issue_number) if issue_number is not None else None, + } + + if LANGSMITH_ENABLED: + metadata["langsmith_project"] = os.environ.get("LANGCHAIN_PROJECT", "workflows-agents") + + tags = [ + "workflows-agents", + f"operation:{operation}", + f"repo:{repo}", + f"issue_or_pr:{issue_or_pr_number}", + f"run_id:{run_id}", + ] + + return {"metadata": metadata, "tags": tags} + + +def derive_langsmith_trace_url(trace_id: str | None) -> str | None: + """Derive a clickable LangSmith trace URL from a trace ID. + + Returns ``None`` when *trace_id* is falsy. + """ + if not trace_id: + return None + return f"{LANGSMITH_TRACE_URL_BASE}{trace_id}" + + +def extract_trace_id(response) -> str | None: + """Extract LangSmith trace ID from a LangChain response object. + + Works with responses from ChatOpenAI, ChatAnthropic, and other LangChain clients. + Returns None if no trace ID is available or LangSmith tracing is disabled. + + Args: + response: LangChain response object (e.g., AIMessage from client.invoke()) + + Returns: + Trace ID string or None + """ + if not LANGSMITH_ENABLED: + return None + + # LangChain response objects have a response_metadata dict with run_id + # The run_id is the trace ID in LangSmith + try: + # Try to get run_id from response metadata (primary method) + if hasattr(response, "response_metadata"): + metadata = response.response_metadata + if isinstance(metadata, dict) and "run_id" in metadata: + return str(metadata["run_id"]) + + # Fallback: Some LangChain providers may use id attribute directly + # WARNING: This may not always correspond to the LangSmith trace ID + if hasattr(response, "id"): + trace_id = str(response.id) + logger.debug( + "Using response.id as trace ID (fallback). " + "Verify this corresponds to LangSmith trace for your provider." + ) + return trace_id + + # Additional fallback for compatibility + if hasattr(response, "__dict__"): + response_dict = response.__dict__ + if "id" in response_dict: + trace_id = str(response_dict["id"]) + logger.debug( + "Using response.__dict__['id'] as trace ID (fallback). " + "Verify this corresponds to LangSmith trace for your provider." + ) + return trace_id + + except Exception as e: + logger.debug(f"Failed to extract trace ID from response: {e}") + return None + + return None + def _is_token_limit_error(error: Exception) -> bool: """Check if error is a token limit (413) error from GitHub Models.""" diff --git a/tools/requirements-llm.txt b/tools/requirements-llm.txt index 1cfa17ca..d97f23c8 100644 --- a/tools/requirements-llm.txt +++ b/tools/requirements-llm.txt @@ -3,10 +3,10 @@ # - These are standalone runtime pins for workflow LLM steps, not app deps. # - When updating, coordinate with requirements.lock and pyproject.toml. # - Use strict X.Y.Z pins to keep workflow installs reproducible. -langchain==1.2.9 -langchain-core==1.2.11 +langchain==1.2.10 +langchain-core==1.2.13 langchain-community==0.4.1 -langchain-openai==1.1.7 -langchain-anthropic==1.3.2 +langchain-openai==1.1.9 +langchain-anthropic==1.3.3 pydantic==2.12.5 requests==2.32.5