diff --git a/.github/scripts/conflict_detector.js b/.github/scripts/conflict_detector.js
index 12ab605f..148c8244 100644
--- a/.github/scripts/conflict_detector.js
+++ b/.github/scripts/conflict_detector.js
@@ -19,6 +19,34 @@ const IGNORED_CONFLICT_FILES = [
   'residual-trend-history.ndjson',
 ];
 
+// Comments from automation often mention "conflict" but should not block execution.
+const IGNORED_COMMENT_AUTHORS = new Set([
+  'github-actions[bot]',
+  'github-merge-queue[bot]',
+  'dependabot[bot]',
+  'github',
+]);
+
+const IGNORED_COMMENT_MARKERS = [
+  '<!-- keepalive-state',
+  'keepalive-loop-summary',
+  'auto-status-summary',
+];
+
+function isIgnoredComment(comment) {
+  if (!comment) {
+    return false;
+  }
+
+  const author = comment.user?.login || '';
+  if (comment.user?.type === 'Bot' || IGNORED_COMMENT_AUTHORS.has(author)) {
+    return true;
+  }
+
+  const body = comment.body || '';
+  return IGNORED_COMMENT_MARKERS.some((marker) => body.includes(marker));
+}
+
 /**
  * Check if a file should be excluded from conflict detection.
  * @param {string} filename - File path to check
@@ -223,8 +251,10 @@ async function checkCommentsForConflicts(github, context, prNumber) {
       per_page: 20,
     });
 
-    // Check recent comments (last 10)
-    const recentComments = comments.slice(-10);
+    // Check recent comments (last 10) and ignore bot/system noise
+    const recentComments = comments
+      .filter((comment) => !isIgnoredComment(comment))
+      .slice(-10);
 
     for (const comment of recentComments) {
       for (const pattern of CONFLICT_PATTERNS) {
diff --git a/.github/scripts/error_classifier.js b/.github/scripts/error_classifier.js
index 8bf50b4d..18f89c2c 100644
--- a/.github/scripts/error_classifier.js
+++ b/.github/scripts/error_classifier.js
@@ -188,6 +188,9 @@ function classifyByMessage(message) {
 
 function classifyError(error) {
   const message = normaliseMessage(error);
+  const preview = message ? message.slice(0, 50) : 'unknown';
+  // eslint-disable-next-line no-console
+  console.log(`[error_classifier] Classifying error: ${preview}`);
   const status = getStatusCode(error);
 
   const statusCategory = status ? classifyByStatus(status, message) : null;
diff --git a/.github/scripts/parse_chatgpt_topics.py b/.github/scripts/parse_chatgpt_topics.py
index 6fae0dd7..42056ffd 100755
--- a/.github/scripts/parse_chatgpt_topics.py
+++ b/.github/scripts/parse_chatgpt_topics.py
@@ -119,6 +119,11 @@ def _parse_sections(
         "tasks": {"tasks"},
         "acceptance_criteria": {"acceptance criteria", "acceptance criteria."},
         "implementation_notes": {
+            "admin access",
+            "admin requirement",
+            "admin requirements",
+            "dependencies",
+            "dependency",
             "implementation notes",
             "implementation note",
             "notes",
diff --git a/.github/workflows/agents-auto-label.yml b/.github/workflows/agents-auto-label.yml
index 8908fb7d..a4f9734a 100644
--- a/.github/workflows/agents-auto-label.yml
+++ b/.github/workflows/agents-auto-label.yml
@@ -27,20 +27,16 @@ jobs:
       !contains(github.event.issue.labels.*.name, 'automated')
 
     steps:
-      - name: Checkout Workflows repo
-        uses: actions/checkout@v6
-        with:
-          repository: stranske/Workflows
-          path: workflows-repo
+      - name: Checkout repository
+        uses: actions/checkout@v4
 
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.12"
+          python-version: "3.11"
 
       - name: Install dependencies
         run: |
-          cd workflows-repo
           pip install -e ".[langchain]" --quiet
 
       - name: Get repo labels
@@ -74,8 +70,8 @@ jobs:
           LABELS_JSON: ${{ steps.get-labels.outputs.labels_json }}
           ISSUE_TITLE: ${{ github.event.issue.title }}
           ISSUE_BODY: ${{ github.event.issue.body }}
+          PYTHONPATH: ${{ github.workspace }}
         run: |
-          cd workflows-repo
           python3 << 'PYTHON_SCRIPT'
           import json
           import os
@@ -135,6 +131,16 @@ jobs:
           auto_apply = [m for m in matches if m.score >= auto_threshold]
           suggestions = [m for m in matches if suggest_threshold <= m.score < auto_threshold]
 
+          # IMPORTANT: Only auto-apply the BEST matching label, not all above threshold
+          # This prevents over-labeling issues with multiple labels like bug+enhancement
+          if auto_apply:
+              best_match = auto_apply[0]  # matches are already sorted by score descending
+              auto_apply = [best_match]
+              # Move other high-confidence matches to suggestions
+              for m in matches[1:]:
+                  if m.score >= auto_threshold and m not in suggestions:
+                      suggestions.insert(0, m)
+
           print(f"Auto-apply labels ({auto_threshold}+ confidence):")
           for m in auto_apply:
               print(f"  - {m.label.name}: {m.score:.2%}")
@@ -144,15 +150,15 @@ jobs:
               print(f"  - {m.label.name}: {m.score:.2%}")
 
           # Output results
-          auto_labels = json.dumps([m.label.name for m in auto_apply])
-          suggest_json = json.dumps([
-              {'name': m.label.name, 'score': f'{m.score:.0%}'}
-              for m in suggestions
-          ])
           with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
               f.write('has_suggestions=true\n')
-              f.write(f'auto_apply_labels={auto_labels}\n')
-              f.write(f'suggested_labels={suggest_json}\n')
+              auto_json = json.dumps([m.label.name for m in auto_apply])
+              f.write(f'auto_apply_labels={auto_json}\n')
+              sugg_data = [
+                  {"name": m.label.name, "score": f"{m.score:.0%}"}
+                  for m in suggestions
+              ]
+              f.write(f'suggested_labels={json.dumps(sugg_data)}\n')
 
           PYTHON_SCRIPT
 
@@ -220,14 +226,14 @@ jobs:
             body += `${suggestions}\n\n`;
 
             if (autoApplied.length > 0) {
-              const appliedStr = autoApplied.map(l => `\`${l}\``).join(', ');
-              body += `**Auto-applied:** ${appliedStr}\n\n`;
+              const applied = autoApplied.map(l => `\`${l}\``).join(', ');
+              body += `**Auto-applied:** ${applied}\n\n`;
             }
 
             body += `<details>\n<summary>How to use these suggestions</summary>\n\n`;
             body += `- Click the label name in the sidebar to add it\n`;
-            const ghCmd = `gh issue edit ${context.issue.number} --add-label "label-name"`;
-            body += `- Or use the GitHub CLI: \`${ghCmd}\`\n`;
+            const editCmd = `gh issue edit ${context.issue.number} --add-label "label-name"`;
+            body += `- Or use the GitHub CLI: \`${editCmd}\`\n`;
             body += `</details>\n\n`;
             body += `---\n*Auto-generated by label matcher*`;
 
diff --git a/.github/workflows/agents-auto-pilot.yml b/.github/workflows/agents-auto-pilot.yml
new file mode 100644
index 00000000..2b7bb27d
--- /dev/null
+++ b/.github/workflows/agents-auto-pilot.yml
@@ -0,0 +1,725 @@
+# See docs/ci/AGENTS_POLICY.md for guardrails and override process.
+name: Agents Auto-Pilot
+
+# End-to-end automation: Issue → Optimize → Agent → Keepalive → Merge
+# Triggered by agents:auto-pilot label, orchestrates the full pipeline
+# Auto-continues when prep workflow labels are added (format, optimize, apply-suggestions)
+
+on:
+  issues:
+    types: [labeled, closed]
+  pull_request:
+    types: [labeled, closed]
+  workflow_dispatch:
+    inputs:
+      issue_number:
+        description: "Issue number to auto-pilot"
+        required: true
+        type: number
+      force_step:
+        description: "Force a specific step (optional, leave as 'auto' for normal flow)"
+        required: false
+        type: choice
+        options:
+          - auto
+          - format
+          - optimize
+          - apply
+          - capability-check
+          - agent
+          - verify
+
+permissions:
+  contents: read
+  issues: write
+  pull-requests: write  # Needed to create PRs automatically
+  actions: write  # Needed for workflow re-dispatch
+
+env:
+  # Safety limits
+  MAX_CYCLES: 10
+  MAX_WALL_TIME_HOURS: 4
+
+jobs:
+  auto-pilot:
+    runs-on: ubuntu-latest
+    timeout-minutes: 240  # 4 hours = MAX_WALL_TIME_HOURS
+    # Trigger on:
+    # 1. agents:auto-pilot label added (initial trigger)
+    # 2. Prep workflow labels added (auto-continue after each step)
+    # 3. Issue closed (verification trigger)
+    # 4. workflow_dispatch (manual trigger)
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event.action == 'labeled' && (
+        github.event.label.name == 'agents:auto-pilot' ||
+        github.event.label.name == 'agents:format' ||
+        github.event.label.name == 'agents:optimize' ||
+        github.event.label.name == 'agents:apply-suggestions' ||
+        github.event.label.name == 'agent:codex'
+      )) ||
+      (github.event.action == 'closed' &&
+       contains(github.event.issue.labels.*.name, 'agents:auto-pilot'))
+
+    steps:
+      - name: Check if auto-pilot is enabled
+        id: check_enabled
+        uses: actions/github-script@v7
+        with:
+          script: |
+            // For label events other than agents:auto-pilot, verify auto-pilot is enabled
+            const labelName = context.payload.label?.name || '';
+            if (context.eventName === 'workflow_dispatch') {
+              core.setOutput('enabled', 'true');
+              return;
+            }
+
+            // Get issue/PR to check for agents:auto-pilot label
+            let labels = [];
+            if (context.payload.issue) {
+              labels = context.payload.issue.labels.map(l => l.name);
+            } else if (context.payload.pull_request) {
+              labels = context.payload.pull_request.labels.map(l => l.name);
+            }
+
+            const hasAutoPilot = labels.includes('agents:auto-pilot');
+            if (!hasAutoPilot && labelName !== 'agents:auto-pilot') {
+              core.info(`Skipping: auto-pilot not enabled (trigger: ${labelName})`);
+              core.setOutput('enabled', 'false');
+              return;
+            }
+
+            core.setOutput('enabled', 'true');
+
+      - name: Checkout repository
+        if: steps.check_enabled.outputs.enabled == 'true'
+        uses: actions/checkout@v4
+
+      - name: Determine context
+        if: steps.check_enabled.outputs.enabled == 'true'
+        id: context
+        uses: actions/github-script@v7
+        with:
+          script: |
+            let issueNumber, issue, pr;
+
+            // Get issue number from various sources
+            if (context.eventName === 'workflow_dispatch') {
+              issueNumber = parseInt('${{ inputs.issue_number }}');
+            } else if (context.payload.issue) {
+              issueNumber = context.payload.issue.number;
+              issue = context.payload.issue;
+            } else if (context.payload.pull_request) {
+              // For PR events, find linked issue
+              pr = context.payload.pull_request;
+              const bodyMatch = pr.body?.match(/#(\d+)/);
+              issueNumber = bodyMatch ? parseInt(bodyMatch[1]) : null;
+            }
+
+            if (!issueNumber) {
+              core.setFailed('Could not determine issue number');
+              return;
+            }
+
+            // Fetch issue if not in payload
+            if (!issue) {
+              const { data } = await github.rest.issues.get({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issueNumber
+              });
+              issue = data;
+            }
+
+            const labels = issue.labels.map(l => l.name);
+
+            // Check for pause label
+            if (labels.includes('agents:auto-pilot-pause')) {
+              core.info('Auto-pilot paused by agents:auto-pilot-pause label');
+              core.setOutput('should_continue', 'false');
+              core.setOutput('reason', 'paused');
+              return;
+            }
+
+            // Check for failure/needs-human
+            if (labels.includes('needs-human') || labels.includes('agents:auto-pilot-failed')) {
+              core.info('Auto-pilot stopped: requires human intervention');
+              core.setOutput('should_continue', 'false');
+              core.setOutput('reason', 'needs-human');
+              return;
+            }
+
+            // Determine current state
+            const hasFormat = labels.includes('agents:format');
+            const hasOptimize = labels.includes('agents:optimize');
+            const hasApplySuggestions = labels.includes('agents:apply-suggestions');
+            const hasAgentCodex = labels.includes('agent:codex');
+            const hasAutofix = labels.includes('autofix');
+            const hasAutomerge = labels.includes('automerge');
+            const hasVerify = labels.includes('verify:evaluate');
+
+            // Check for linked PR (with pagination and multiple event types)
+            const timelineEvents = await github.paginate(
+              github.rest.issues.listEventsForTimeline,
+              {
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issueNumber,
+                per_page: 100
+              }
+            );
+
+            let linkedPR = null;
+            for (const event of timelineEvents) {
+              // Handle both cross-referenced and connected events
+              if ((event.event === 'cross-referenced' || event.event === 'connected') &&
+                  event.source?.issue?.pull_request) {
+                linkedPR = event.source.issue.number;
+              }
+            }
+
+            core.setOutput('issue_number', issueNumber);
+            core.setOutput('issue_title', issue.title);
+            core.setOutput('issue_state', issue.state);
+            core.setOutput('should_continue', 'true');
+            core.setOutput('has_format', hasFormat.toString());
+            core.setOutput('has_optimize', hasOptimize.toString());
+            core.setOutput('has_apply', hasApplySuggestions.toString());
+            core.setOutput('has_agent', hasAgentCodex.toString());
+            core.setOutput('has_autofix', hasAutofix.toString());
+            core.setOutput('has_automerge', hasAutomerge.toString());
+            core.setOutput('has_verify', hasVerify.toString());
+            core.setOutput('linked_pr', linkedPR || '');
+
+            console.log(`Issue #${issueNumber} state:`);
+            console.log(`  State: ${issue.state}`);
+            console.log(`  Labels: ${labels.join(', ')}`);
+            console.log(`  Linked PR: ${linkedPR || 'none'}`);
+
+      - name: Check step count
+        if: steps.context.outputs.should_continue == 'true'
+        id: cycles
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+
+            // Get all comments to count auto-pilot steps (with pagination)
+            const allComments = await github.paginate(
+              github.rest.issues.listComments,
+              {
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issueNumber,
+                per_page: 100
+              }
+            );
+
+            const stepComments = allComments.filter(c =>
+              typeof c.body === 'string' && c.body.includes('🤖 Auto-pilot step')
+            );
+
+            const stepCount = stepComments.length;
+            const maxCycles = parseInt('${{ env.MAX_CYCLES }}');
+
+            if (stepCount >= maxCycles) {
+              core.warning(`Auto-pilot exceeded max steps (${stepCount}/${maxCycles})`);
+              core.setOutput('exceeded', 'true');
+              return;
+            }
+
+            core.setOutput('exceeded', 'false');
+            core.setOutput('count', stepCount.toString());
+
+      - name: Stop if exceeded
+        if: steps.cycles.outputs.exceeded == 'true'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+
+            // Add failure label and comment
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              labels: ['needs-human', 'agents:auto-pilot-failed']
+            });
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: `## ⚠️ Auto-Pilot Stopped
+
+            **Reason:** Exceeded maximum cycle limit (${{ env.MAX_CYCLES }} cycles)
+
+            This issue requires human review. Possible causes:
+            - Repeated CI failures
+            - Conflicting requirements
+            - External dependencies
+
+            **To resume:** Remove \`agents:auto-pilot-failed\` and \`needs-human\`
+            labels, then re-add \`agents:auto-pilot\`.`
+            });
+
+            core.setFailed('Auto-pilot exceeded max cycles');
+
+      - name: Determine next step
+        if: |
+          steps.context.outputs.should_continue == 'true' &&
+          steps.cycles.outputs.exceeded != 'true'
+        id: next
+        env:
+          FORCE_STEP: ${{ inputs.force_step }}
+        run: |
+          # Priority order of steps
+          # 1. If issue closed and has verify label → done
+          # 2. If issue closed without verify → add verify
+          # 3. If no PR → need agent assignment
+          # 4. If has PR → check PR state
+          # 5. If no format → format first
+          # 6. If no optimize → optimize
+          # 7. If no apply → apply suggestions
+
+          ISSUE_STATE="${{ steps.context.outputs.issue_state }}"
+          HAS_FORMAT="${{ steps.context.outputs.has_format }}"
+          HAS_OPTIMIZE="${{ steps.context.outputs.has_optimize }}"
+          HAS_APPLY="${{ steps.context.outputs.has_apply }}"
+          HAS_AGENT="${{ steps.context.outputs.has_agent }}"
+          LINKED_PR="${{ steps.context.outputs.linked_pr }}"
+          HAS_VERIFY="${{ steps.context.outputs.has_verify }}"
+
+          # Force step if specified (not 'auto')
+          if [[ -n "$FORCE_STEP" && "$FORCE_STEP" != "auto" ]]; then
+            echo "next_step=$FORCE_STEP" >> "$GITHUB_OUTPUT"
+            echo "Forced step: $FORCE_STEP"
+            exit 0
+          fi
+
+          # Issue closed = done or verify
+          if [[ "$ISSUE_STATE" == "closed" ]]; then
+            if [[ "$HAS_VERIFY" == "true" ]]; then
+              echo "next_step=done" >> "$GITHUB_OUTPUT"
+              echo "Issue closed with verification - auto-pilot complete"
+            else
+              echo "next_step=verify" >> "$GITHUB_OUTPUT"
+              echo "Issue closed - triggering verification"
+            fi
+            exit 0
+          fi
+
+          # No PR yet - need to go through issue prep pipeline
+          if [[ -z "$LINKED_PR" ]]; then
+            if [[ "$HAS_AGENT" == "true" ]]; then
+              echo "next_step=create-pr" >> "$GITHUB_OUTPUT"
+              echo "Agent assigned, checking for branch to create PR"
+            elif [[ "$HAS_FORMAT" != "true" ]]; then
+              echo "next_step=format" >> "$GITHUB_OUTPUT"
+              echo "Step 1: Format issue"
+            elif [[ "$HAS_OPTIMIZE" != "true" ]]; then
+              echo "next_step=optimize" >> "$GITHUB_OUTPUT"
+              echo "Step 2: Optimize issue"
+            elif [[ "$HAS_APPLY" != "true" ]]; then
+              echo "next_step=apply" >> "$GITHUB_OUTPUT"
+              echo "Step 3: Apply suggestions"
+            else
+              echo "next_step=capability-check" >> "$GITHUB_OUTPUT"
+              echo "Step 4: Run capability check before agent"
+            fi
+            exit 0
+          fi
+
+          # Has PR - let keepalive/autofix handle it
+          echo "next_step=monitor-pr" >> "$GITHUB_OUTPUT"
+          echo "PR #$LINKED_PR exists - monitoring via keepalive"
+
+      - name: Execute step - Format
+        if: steps.next.outputs.next_step == 'format'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+          STEP_COUNT: ${{ steps.cycles.outputs.count }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+            const stepCount = parseInt(process.env.STEP_COUNT || '0') + 1;
+
+            // Add progress comment
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: `🤖 **Auto-pilot step ${stepCount}**: Starting issue formatting...
+
+            Adding \`agents:format\` label to trigger LangChain formatting.`
+            });
+
+            // Add format label to trigger the formatter
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              labels: ['agents:format']
+            });
+
+      - name: Execute step - Optimize
+        if: steps.next.outputs.next_step == 'optimize'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+          STEP_COUNT: ${{ steps.cycles.outputs.count }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+            const stepCount = parseInt(process.env.STEP_COUNT || '0') + 1;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: `🤖 **Auto-pilot step ${stepCount}**: Analyzing issue for improvements...
+
+            Adding \`agents:optimize\` label to trigger analysis.`
+            });
+
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              labels: ['agents:optimize']
+            });
+
+      - name: Execute step - Apply suggestions
+        if: steps.next.outputs.next_step == 'apply'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+          STEP_COUNT: ${{ steps.cycles.outputs.count }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+            const stepCount = parseInt(process.env.STEP_COUNT || '0') + 1;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: `🤖 **Auto-pilot step ${stepCount}**: Applying optimization suggestions...
+
+            Adding \`agents:apply-suggestions\` label.`
+            });
+
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              labels: ['agents:apply-suggestions']
+            });
+
+      - name: Execute step - Capability check & Agent
+        if: steps.next.outputs.next_step == 'capability-check'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+          STEP_COUNT: ${{ steps.cycles.outputs.count }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+            const stepCount = parseInt(process.env.STEP_COUNT || '0') + 1;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: `🤖 **Auto-pilot step ${stepCount}**: Issue prepared! Assigning to agent...
+
+            Adding \`agent:codex\` label. The capability check will run automatically.
+
+            ⏳ Agent will create a PR shortly.`
+            });
+
+            // Add agent label - capability check triggers on this
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              labels: ['agent:codex']
+            });
+
+      - name: Execute step - Verify
+        if: steps.next.outputs.next_step == 'verify'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+
+            // Find the merged PR for this issue
+            // Look for PRs with the meta marker or explicit closing reference
+            const { data: prs } = await github.rest.pulls.list({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              state: 'closed',
+              sort: 'updated',
+              direction: 'desc',
+              per_page: 100
+            });
+
+            // Helper to detect explicit closing/fixing references to this issue
+            function matchesIssueReference(text, num) {
+              if (!text) return false;
+              // Match: closes #123, fixes #123, resolves #123 (and variations)
+              const pattern = new RegExp(
+                `\\b(close[sd]?|fixe?[sd]?|resolve[sd]?)\\s+#${num}\\b`, 'i'
+              );
+              return pattern.test(text);
+            }
+
+            // Find PR that references this issue
+            const linkedPr = prs.find(pr =>
+              pr.merged_at && (
+                pr.body?.includes(`meta:issue:${issueNumber}`) ||
+                matchesIssueReference(pr.body, issueNumber) ||
+                matchesIssueReference(pr.title, issueNumber)
+              )
+            );
+
+            if (!linkedPr) {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issueNumber,
+                body: `🤖 **Auto-pilot**: Issue closed but couldn't find linked merged PR.
+
+            Adding \`verify:evaluate\` label to issue for tracking.`
+              });
+
+              await github.rest.issues.addLabels({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issueNumber,
+                labels: ['verify:evaluate']
+              });
+              return;
+            }
+
+            core.info(`Found merged PR #${linkedPr.number} for issue #${issueNumber}`);
+
+            const verifyMsg = [
+              `🤖 **Auto-pilot**: Issue closed.`,
+              `Triggering verification on PR #${linkedPr.number}...`,
+              '',
+              'Adding `verify:evaluate` label to PR.'
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: verifyMsg
+            });
+
+            // Add verify label to the merged PR (this triggers agents-verifier.yml)
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: linkedPr.number,
+              labels: ['verify:evaluate']
+            });
+
+            // Also add to issue for tracking
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              labels: ['verify:evaluate']
+            });
+
+      - name: Execute step - Create PR
+        if: steps.next.outputs.next_step == 'create-pr'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+          ISSUE_TITLE: ${{ steps.context.outputs.issue_title }}
+          STEP_COUNT: ${{ steps.cycles.outputs.count }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+            const issueTitle = process.env.ISSUE_TITLE || `Issue #${issueNumber}`;
+            const stepCount = parseInt(process.env.STEP_COUNT || '0') + 1;
+            const branchName = `codex/issue-${issueNumber}`;
+
+            // Check if branch exists
+            let branchExists = false;
+            try {
+              await github.rest.repos.getBranch({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                branch: branchName
+              });
+              branchExists = true;
+              core.info(`Branch ${branchName} exists`);
+            } catch (e) {
+              if (e.status === 404) {
+                core.info(`Branch ${branchName} does not exist yet`);
+              } else {
+                throw e;
+              }
+            }
+
+            if (!branchExists) {
+              // Branch not created yet - agent still working
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issueNumber,
+                body: `🤖 **Auto-pilot step ${stepCount}**: Waiting for agent to create branch...
+
+            The agent has been assigned but hasn't created the branch yet.
+            Branch expected: \`${branchName}\`
+
+            ⏳ Auto-pilot will check again on next trigger.`
+              });
+              return;
+            }
+
+            // Branch exists - create PR
+            core.info(`Creating PR from ${branchName}`);
+
+            // Fetch issue body to include in PR
+            const { data: issue } = await github.rest.issues.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber
+            });
+
+            const prTitle = `[Auto-pilot] ${issueTitle}`;
+            const prBody = [
+              `<!-- meta:issue:${issueNumber} -->`,
+              '',
+              `Closes #${issueNumber}`,
+              '',
+              issue.body || ''
+            ].join('\n');
+
+            try {
+              const { data: pr } = await github.rest.pulls.create({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                title: prTitle,
+                head: branchName,
+                base: 'main',
+                body: prBody
+              });
+
+              core.info(`Created PR #${pr.number}`);
+
+              // Add standard agent labels to the PR (separate try-catch to not fail PR creation)
+              let labelsAdded = false;
+              try {
+                await github.rest.issues.addLabels({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: pr.number,
+                  labels: ['agent:codex', 'agents:keepalive', 'autofix']
+                });
+                labelsAdded = true;
+                core.info(`Added agent labels to PR #${pr.number}`);
+              } catch (labelError) {
+                const errMsg = labelError?.message || String(labelError);
+                core.warning(`Failed to add labels to PR #${pr.number}: ${errMsg}`);
+              }
+
+              const labelStatus = labelsAdded
+                ? '✅ Added labels: `agent:codex`, `agents:keepalive`, `autofix`'
+                : '⚠️ Could not add labels (add manually)';
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issueNumber,
+                body: `🤖 **Auto-pilot step ${stepCount}**: PR created!
+
+            ✅ Created PR #${pr.number} from branch \`${branchName}\`
+            ${labelStatus}
+
+            The PR will now go through CI checks. Auto-pilot will continue monitoring.`
+              });
+
+            } catch (e) {
+              if (e.status === 422 && e.message?.includes('already exists')) {
+                core.info('PR already exists - this is fine');
+              } else {
+                // PR creation failed - report but don't fail workflow
+                core.warning(`Failed to create PR: ${e.message}`);
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: issueNumber,
+                  body: `🤖 **Auto-pilot step ${stepCount}**: Could not create PR
+
+            ⚠️ Branch \`${branchName}\` exists but PR creation failed.
+
+            Error: ${e.message}
+
+            Please create the PR manually or check permissions.`
+                });
+              }
+            }
+
+      - name: Report - Monitoring PR
+        if: steps.next.outputs.next_step == 'monitor-pr'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const prNumber = '${{ steps.context.outputs.linked_pr }}';
+            core.info(`PR #${prNumber} exists. Keepalive and autofix will handle CI.`);
+
+      - name: Report - Done
+        if: steps.next.outputs.next_step == 'done'
+        uses: actions/github-script@v7
+        env:
+          ISSUE_NUMBER: ${{ steps.context.outputs.issue_number }}
+        with:
+          script: |
+            const issueNumber = parseInt(process.env.ISSUE_NUMBER);
+
+            // Remove auto-pilot label since we're done
+            try {
+              await github.rest.issues.removeLabel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issueNumber,
+                name: 'agents:auto-pilot'
+              });
+            } catch (e) {
+              // Label might already be removed (404) - that's OK
+              if (e && e.status === 404) {
+                core.info('Auto-pilot label already removed or not found');
+              } else {
+                core.warning(`Unexpected error removing auto-pilot label: ${e?.message || e}`);
+              }
+            }
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: `## ✅ Auto-Pilot Complete
+
+            This issue has been fully processed:
+            - ✅ Issue formatted and optimized
+            - ✅ Agent assigned and PR created
+            - ✅ PR merged
+            - ✅ Verification triggered
+
+            Thank you for using auto-pilot! 🚀`
+            });
+
+            core.info('Auto-pilot complete!');
diff --git a/.github/workflows/agents-dedup.yml b/.github/workflows/agents-dedup.yml
index 6be508f6..5820afbd 100644
--- a/.github/workflows/agents-dedup.yml
+++ b/.github/workflows/agents-dedup.yml
@@ -14,8 +14,9 @@ permissions:
 
 env:
   # Similarity threshold for flagging duplicates (0.0-1.0)
-  # 0.85 = very similar, reduces false positives
-  SIMILARITY_THRESHOLD: "0.85"
+  # 0.92 = very high similarity required, reduces false positives from
+  # issues in the same domain/feature area that share vocabulary
+  SIMILARITY_THRESHOLD: "0.92"
 
 jobs:
   dedup:
@@ -24,20 +25,16 @@ jobs:
     if: github.event.issue.user.type != 'Bot'
 
     steps:
-      - name: Checkout Workflows repo
-        uses: actions/checkout@v6
-        with:
-          repository: stranske/Workflows
-          path: workflows-repo
+      - name: Checkout repository
+        uses: actions/checkout@v4
 
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.12"
+          python-version: "3.11"
 
       - name: Install dependencies
         run: |
-          cd workflows-repo
           pip install -e ".[langchain]" --quiet
 
       - name: Get open issues
@@ -79,11 +76,10 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          PYTHONPATH: ${{ github.workspace }}/workflows-repo
+          PYTHONPATH: ${{ github.workspace }}
           NEW_ISSUE_TITLE: ${{ github.event.issue.title }}
           NEW_ISSUE_BODY: ${{ github.event.issue.body }}
         run: |
-          cd workflows-repo
           python -c "
           import json
           import os
@@ -97,7 +93,7 @@ jobs:
           )
 
           # Load open issues
-          with open('../open_issues.json') as f:
+          with open('open_issues.json') as f:
               issues_data = json.load(f)
 
           if not issues_data:
@@ -127,9 +123,31 @@ jobs:
           new_body = os.environ.get('NEW_ISSUE_BODY', '')
           query = f'{new_title}\n\n{new_body}'
 
-          threshold = float(os.environ.get('SIMILARITY_THRESHOLD', '0.85'))
+          threshold = float(os.environ.get('SIMILARITY_THRESHOLD', '0.92'))
           matches = find_similar_issues(store, query, threshold=threshold, k=3)
 
+          # Additional filter: require title similarity for true duplicates
+          # This reduces false positives from issues in the same domain/feature area
+          # that share vocabulary but are different tasks
+          filtered_matches = []
+          new_title_lower = new_title.lower().strip()
+          for m in matches:
+              match_title_lower = m.issue.title.lower().strip()
+              # Check for significant title overlap
+              title_words_new = set(new_title_lower.split())
+              title_words_match = set(match_title_lower.split())
+              shared_words = title_words_new.intersection(title_words_match)
+              # Require at least 40% of words to overlap for a duplicate flag
+              max_words = max(len(title_words_new), len(title_words_match), 1)
+              overlap_ratio = len(shared_words) / max_words
+              if m.score >= 0.95 or overlap_ratio >= 0.4:
+                  filtered_matches.append(m)
+                  print(f'  Match #{m.issue.number}: {m.score:.0%}, overlap={overlap_ratio:.0%}')
+              else:
+                  print(f'  Skip #{m.issue.number}: {m.score:.0%}, overlap={overlap_ratio:.0%}')
+
+          matches = filtered_matches
+
           if not matches:
               print('No duplicates found above threshold')
               with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
@@ -149,7 +167,7 @@ jobs:
               f.write(f'duplicate_count={len(duplicates)}\n')
 
           # Write to file for GitHub script
-          with open('../duplicates.json', 'w') as f:
+          with open('duplicates.json', 'w') as f:
               json.dump(duplicates, f)
 
           print(f'Found {len(duplicates)} potential duplicates:')
diff --git a/.github/workflows/agents-verifier.yml b/.github/workflows/agents-verifier.yml
index 1eaeb104..37bfce60 100644
--- a/.github/workflows/agents-verifier.yml
+++ b/.github/workflows/agents-verifier.yml
@@ -38,23 +38,20 @@ on:
         default: 'evaluate'
       model:
         description: >-
-          Model for evaluation. GitHub Models: gpt-4o, gpt-4o-mini,
-          text-embedding-3-large, text-embedding-3-small, Meta-Llama-3.1-405B-Instruct,
-          Meta-Llama-3.1-70B-Instruct, Meta-Llama-3-70B-Instruct |
-          OpenAI: gpt-5.2, o1, o1-preview, o1-mini, o3-mini (if available), gpt-4o, gpt-4o-mini,
-          gpt-4-turbo, gpt-4, gpt-3.5-turbo. Use script scripts/update_model_list.sh
-          to check current availability.
+          Model for evaluation. GitHub Models: gpt-4o (default), Mistral-large-2407,
+          Meta-Llama-3.1-405B-Instruct | OpenAI (requires key): o1, gpt-5.2.
+          For stricter evaluation, use compare mode with different model families.
         required: false
         type: string
-        default: 'gpt-4o-mini'
+        default: 'gpt-4o'
       model2:
         description: >-
-          Second model for compare mode. High quality options:
-          GitHub Models: gpt-4o, Meta-Llama-3.1-405B-Instruct |
-          OpenAI: gpt-5.2, o1, gpt-4o, gpt-4-turbo. Efficient: gpt-4o-mini, o1-mini
+          Second model for compare mode (cross-provider verification).
+          Default: Mistral-large-2407 (GitHub Models) paired with gpt-5.2 (OpenAI).
+          Using different providers ensures diverse evaluation perspectives.
         required: false
         type: string
-        default: ''
+        default: 'Mistral-large-2407'
       provider:
         description: 'LLM provider (OpenAI requires OPENAI_API_KEY secret)'
         required: true
@@ -156,12 +153,13 @@ jobs:
             core.info(`Verifier triggered with mode: ${mode}`);
             core.setOutput('should_run', 'true');
             core.setOutput('mode', mode);
-            // For compare mode, use high-quality models from different providers
+            // For compare mode, use models from different families/providers
             if (mode === 'compare') {
-              core.setOutput('model', 'gpt-4o');   // GitHub Models - current flagship
-              core.setOutput('model2', 'gpt-5.2'); // OpenAI - GPT-5.2
+              // gpt-5.2 (OpenAI) + Mistral-large (GitHub Models) for cross-provider comparison
+              core.setOutput('model', 'gpt-5.2');
+              core.setOutput('model2', 'Mistral-large-2407');
             } else {
-              core.setOutput('model', '');  // Use default
+              core.setOutput('model', '');  // Use default (gpt-4o)
               core.setOutput('model2', '');
             }
             core.setOutput('provider', '');  // Use default
diff --git a/scripts/langchain/issue_formatter.py b/scripts/langchain/issue_formatter.py
index c527299f..d89d64d2 100755
--- a/scripts/langchain/issue_formatter.py
+++ b/scripts/langchain/issue_formatter.py
@@ -309,6 +309,62 @@ def _append_raw_issue_section(formatted: str, issue_body: str) -> str:
     return f"{formatted.rstrip()}{details}\n"
 
 
+def _extract_tasks_from_formatted(body: str) -> list[str]:
+    lines = body.splitlines()
+    header = "## Tasks"
+    try:
+        header_idx = next(i for i, line in enumerate(lines) if line.strip() == header)
+    except StopIteration:
+        return []
+    end_idx = next(
+        (
+            i
+            for i in range(header_idx + 1, len(lines))
+            if lines[i].startswith("## ") and lines[i].strip() != header
+        ),
+        len(lines),
+    )
+    tasks: list[str] = []
+    for line in lines[header_idx + 1 : end_idx]:
+        if not line.strip():
+            continue
+        match = LIST_ITEM_REGEX.match(line)
+        if not match:
+            continue
+        indent, _, remainder = match.groups()
+        if indent.strip():
+            continue
+        text = remainder.strip()
+        checkbox = CHECKBOX_REGEX.match(text)
+        if checkbox:
+            text = checkbox.group(2).strip()
+        if not text or text == "_Not provided._":
+            continue
+        tasks.append(text)
+    return tasks
+
+
+def _apply_task_decomposition(formatted: str, *, use_llm: bool) -> str:
+    tasks = _extract_tasks_from_formatted(formatted)
+    if not tasks:
+        return formatted
+
+    from scripts.langchain import task_decomposer
+
+    suggestions: list[dict[str, Any]] = []
+    for task in tasks:
+        decomposition = task_decomposer.decompose_task(task, use_llm=use_llm)
+        sub_tasks = decomposition.get("sub_tasks") or []
+        if sub_tasks:
+            suggestions.append({"task": task, "split_suggestions": sub_tasks})
+    if not suggestions:
+        return formatted
+
+    from scripts.langchain import issue_optimizer
+
+    return issue_optimizer._apply_task_decomposition(formatted, {"task_splitting": suggestions})
+
+
 def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any]:
     if not issue_body:
         issue_body = ""
@@ -327,6 +383,7 @@ def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any
                 content = getattr(response, "content", None) or str(response)
                 formatted = content.strip()
                 if _formatted_output_valid(formatted):
+                    formatted = _apply_task_decomposition(formatted, use_llm=use_llm)
                     formatted = _append_raw_issue_section(formatted, issue_body)
                     return {
                         "formatted_body": formatted,
@@ -337,7 +394,9 @@ def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any
                 # Fall through to fallback if LLM fails (import, auth, API errors)
                 pass
 
-    formatted = _append_raw_issue_section(_format_issue_fallback(issue_body), issue_body)
+    formatted = _format_issue_fallback(issue_body)
+    formatted = _apply_task_decomposition(formatted, use_llm=use_llm)
+    formatted = _append_raw_issue_section(formatted, issue_body)
     return {
         "formatted_body": formatted,
         "provider_used": None,
diff --git a/tools/llm_provider.py b/tools/llm_provider.py
index 2db6c931..08bd23e6 100644
--- a/tools/llm_provider.py
+++ b/tools/llm_provider.py
@@ -31,7 +31,9 @@
 
 # GitHub Models API endpoint (OpenAI-compatible)
 GITHUB_MODELS_BASE_URL = "https://models.inference.ai.azure.com"
-DEFAULT_MODEL = "gpt-4o-mini"
+# Use gpt-4o for evaluation - best available on GitHub Models
+# gpt-4o-mini was too lenient and passed obvious deficiencies
+DEFAULT_MODEL = "gpt-4o"
 
 
 def _setup_langsmith_tracing() -> bool: