diff --git a/packages/core/src/orchestrator/prompt-builder.test.ts b/packages/core/src/orchestrator/prompt-builder.test.ts index 5927857dfb..33b89090f2 100644 --- a/packages/core/src/orchestrator/prompt-builder.test.ts +++ b/packages/core/src/orchestrator/prompt-builder.test.ts @@ -30,6 +30,42 @@ describe('buildRoutingRulesWithProject', () => { expect(rules).toContain('NO knowledge of the conversation history'); }); + + test('includes workflow slash-command catalog so the orchestrator stops hallucinating answers', () => { + // Regression: the orchestrator used to answer "you can't abandon a failed + // run, it's already terminal" when a user typed `workflow abandon X` + // without the leading slash. The agent had no information about which + // workflow slash-commands exist or what they do, so it guessed from + // training-time defaults. We now ship an authoritative catalog in the + // system prompt and instruct the agent to redirect to slash-prefix + // rather than answer from speculation. + const rules = buildRoutingRulesWithProject(); + + expect(rules).toContain('## Workflow Slash Commands'); + // Catalog-completeness: every documented subcommand must be present so the + // catalog stays the authoritative source the prompt claims it is. If any + // entry drifts out, the agent will silently fall back to training-time + // speculation for that command — exactly the failure mode this PR fixes. + // Pragmatic source-of-truth: this list mirrors the case statements in + // command-handler.ts:handleWorkflowCommand. When a new subcommand is added + // there, it should also be added to the prompt catalog (and to this list). + const expectedSubcommands = [ + 'run', + 'resume', + 'abandon', + 'list', + 'status', + 'cancel', + 'approve', + 'reject', + ]; + for (const cmd of expectedSubcommands) { + expect(rules).toContain(`/workflow ${cmd}`); + } + // The hard rule: don't speculate about workflow internals. + expect(rules).toContain('Do not invent rules about which statuses'); + expect(rules).toContain('with the leading slash'); + }); }); describe('formatWorkflowContextSection', () => { diff --git a/packages/core/src/orchestrator/prompt-builder.ts b/packages/core/src/orchestrator/prompt-builder.ts index 07a3a7a709..b813d6d740 100644 --- a/packages/core/src/orchestrator/prompt-builder.ts +++ b/packages/core/src/orchestrator/prompt-builder.ts @@ -132,7 +132,41 @@ To update a project's path: To remove a registered project: /remove-project {project-name} -IMPORTANT: Always clone into ~/.archon/workspaces/{owner}/{repo}/source unless the user specifies a different location.`; +IMPORTANT: Always clone into ~/.archon/workspaces/{owner}/{repo}/source unless the user specifies a different location. + +## Workflow Slash Commands + +Users have access to slash commands for direct workflow control. These bypass +you (the orchestrator) and run through the command parser. They MUST start with +a literal \`/\`. If a user types something that looks like one of these commands +without the leading slash (e.g. "workflow abandon abc123", "workflow run X"), +do NOT answer the question yourself based on guesses about workflow internals +— suggest they re-type with a leading slash. The command parser is the +authoritative source of truth, not your training data. + +Available slash commands: + +- \`/workflow run ""\` — run a workflow directly. +- \`/workflow resume \` — resume a failed or paused run from where it stopped. +- \`/workflow abandon \` — discard a workflow run (transitions it to + \`cancelled\`). Works on \`failed\`, \`paused\`, \`running\`, and \`pending\` runs. + Already-cancelled or already-completed runs cannot be abandoned. +- \`/workflow list\` — show available workflows. +- \`/workflow status\` — show all active workflow runs. +- \`/workflow cancel\` — cancel a running workflow. +- \`/workflow approve [comment]\` — approve a paused (gate) run. +- \`/workflow reject [reason]\` — reject a paused (gate) run. + +Example of a slash-prefix-missing case: +User: "workflow abandon 32c786ef8c68d3263a80ca9d9d463f3f" +Correct response: "Try \`/workflow abandon 32c786ef8c68d3263a80ca9d9d463f3f\` +(with the leading slash) — that runs the actual command. The chat agent can't +execute workflow operations directly, only the slash-command parser can." + +Do not invent rules about which statuses can or can't transition. If the user +is unsure whether an operation will work, the right answer is "try the slash +command and see what it reports" — never speculate about terminal-status +restrictions or other workflow internals.`; } /**