diff --git a/.github/scripts/__tests__/keepalive-orchestrator-gate-runner.test.js b/.github/scripts/__tests__/keepalive-orchestrator-gate-runner.test.js
index 388588945..010765629 100644
--- a/.github/scripts/__tests__/keepalive-orchestrator-gate-runner.test.js
+++ b/.github/scripts/__tests__/keepalive-orchestrator-gate-runner.test.js
@@ -231,7 +231,7 @@ test('runKeepaliveGate skips when keepalive is paused by label', async () => {
   const { runKeepaliveGate, restore } = loadRunnerWithGate(gateStub);
 
   const pr = makePullRequest({
-    labels: ['agents:pause', 'agents:keepalive', 'agent:codex'],
+    labels: ['agents:paused', 'agents:keepalive', 'agent:codex'],
   });
 
   await runKeepaliveGate({
diff --git a/.github/scripts/keepalive_gate.js b/.github/scripts/keepalive_gate.js
index b7d529503..1a001f22d 100644
--- a/.github/scripts/keepalive_gate.js
+++ b/.github/scripts/keepalive_gate.js
@@ -7,7 +7,7 @@ const AGENT_LABEL_PREFIX = 'agent:';
 const MAX_RUNS_PREFIX = 'agents:max-runs:';
 const SYNC_REQUIRED_LABEL = 'agents:sync-required';
 const ACTIVATED_LABEL = 'agents:activated';
-const PAUSE_LABEL = 'agents:pause';
+const PAUSE_LABEL = 'agents:paused';
 const DEFAULT_RUN_CAP = 1;
 const MIN_RUN_CAP = 1;
 const MAX_RUN_CAP = 5;
diff --git a/.github/scripts/keepalive_orchestrator_gate_runner.js b/.github/scripts/keepalive_orchestrator_gate_runner.js
index 5c7226354..58b7bcfad 100644
--- a/.github/scripts/keepalive_orchestrator_gate_runner.js
+++ b/.github/scripts/keepalive_orchestrator_gate_runner.js
@@ -162,9 +162,9 @@ async function runKeepaliveGate({ core, github, context, env }) {
         .filter(Boolean)
     );
 
-    if (currentLabels.has('agents:pause')) {
+    if (currentLabels.has('agents:paused')) {
       addReason('keepalive-paused');
-      summary.addRaw('Keepalive paused by agents:pause label.').addEOL();
+      summary.addRaw('Keepalive paused by agents:paused label.').addEOL();
     }
 
     const requiredLabels = ['agents:keepalive'];
diff --git a/CLAUDE.md b/CLAUDE.md
index e2f7c8901..af1cb15a1 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -162,7 +162,7 @@ The nested job 'job_name' is requesting 'contents: write', but is only allowed '
 - Check PR has `agent:codex` label
 - Check Gate workflow passed
 - Check PR body has unchecked tasks in Automated Status Summary
-- Check `agents:pause` label is NOT present
+- Check `agents:paused` label is NOT present
 
 ## Documentation Index
 
diff --git a/docs/keepalive/GoalsAndPlumbing.md b/docs/keepalive/GoalsAndPlumbing.md
index 8441b1c89..1b16da168 100644
--- a/docs/keepalive/GoalsAndPlumbing.md
+++ b/docs/keepalive/GoalsAndPlumbing.md
@@ -75,7 +75,7 @@ If any requirement fails, keepalive stays silent—no PR comments. Operators may
 ## 4. Pause & Stop Controls
 
 - Removing the `agent:*` label halts new dispatches until a label is re-applied and all guardrails pass again.
-- Respect the stronger `agents:pause` label, which blocks *all* keepalive activity.
+- Respect the `agents:paused` label, which blocks *all* keepalive activity.
 - After repeated failures (default: 3), the loop pauses and adds `needs-human` label.
 
 **To resume after failure:**
diff --git a/docs/plans/langchain-post-code-rollout.md b/docs/plans/langchain-post-code-rollout.md
index 51979c846..c5db10b12 100644
--- a/docs/plans/langchain-post-code-rollout.md
+++ b/docs/plans/langchain-post-code-rollout.md
@@ -1,8 +1,8 @@
 # LangChain Post-Code Production Capabilities - Evaluation & Rollout Plan
 
 > **Date:** January 7, 2026  
-> **Status:** Phase 1 & 2 Deployed - Active in Production  
-> **Last Validation:** 2026-01-07 (Post-Sync Cleanup)  
+> **Status:** Phase 4 Planning - Full Automation Design  
+> **Last Validation:** 2026-01-07 (Phase 4 Planning Added)  
 
 ---
 
@@ -75,7 +75,7 @@
    - ✅ Travel-Plan-Permission (synced 2026-01-07)
    - ✅ Portable-Alpha-Extension-Model (synced 2026-01-07)
    - ✅ Trend_Model_Project (synced 2026-01-07)
-   - ⚠️ Collab-Admin (sync PR #113 pending - has lint failures)
+   - ✅ Collab-Admin (synced 2026-01-07, PR #113 merged)
 
 2. **Format Labels** - All 7 consumer repos have `agents:format`, `agents:formatted`, `agents:optimize`, `agents:apply-suggestions`:
    - ✅ Manager-Database (tested live - issue #184, synced 2026-01-07)
@@ -84,7 +84,7 @@
    - ✅ Travel-Plan-Permission (synced 2026-01-07)
    - ✅ Portable-Alpha-Extension-Model (synced 2026-01-07)
    - ✅ Trend_Model_Project (synced 2026-01-07)
-   - ⚠️ Collab-Admin (sync PR #113 pending - has lint failures)
+   - ✅ Collab-Admin (synced 2026-01-07, PR #113 merged)
 
 3. **Updated .gitignore** - Consumer repos have old partial version, missing new entries for:
    - `verifier-diff-summary.md`
@@ -135,11 +135,29 @@
 - Issue body updated with AGENT_ISSUE_TEMPLATE format
 - `agents:formatted` label added after successful formatting
 
-### Phase 3 Target: Advanced Features (Optional)
+### Phase 3 Target: Pre-Agent Intelligence (4 Capabilities)
 
-- `capability_check.py` integrated into issue intake OR archived
-- `task_decomposer.py` integrated for large issues OR archived
-- Dedup/semantic matching for issue triage OR archived
+**3A. Capability Check (Pre-Agent Gate)**
+- `capability_check.py` runs before `agent:codex` assignment
+- Identifies issues agent cannot complete (external deps, out-of-scope, credentials needed)
+- **Supplements** `agents:optimize` workflow (quality check) with feasibility check
+- Adds `needs-human` label + explanation when agent cannot proceed
+
+**3B. Task Decomposition (Large Issue Handling)**
+- `task_decomposer.py` auto-splits issues with 5+ implied tasks
+- Creates linked sub-issues or checklist within parent issue
+- Triggers via `agents:decompose` label (new)
+
+**3C. Duplicate Detection (Issue Triage)**
+- `issue_dedup.py` checks new issues against open issues
+- Posts warning comment if duplicate detected (>85% similarity)
+- Creates link to potential duplicate for human review
+- **Testing focus:** Validate false positive rate before auto-closing
+
+**3D. Semantic Label Matching (Auto-Labeling)**
+- `label_matcher.py` suggests appropriate labels based on issue content
+- Posts comment with label suggestions or auto-applies if confidence >90%
+- Uses `semantic_matcher.py` for embedding-based similarity
 
 ---
 
@@ -159,12 +177,11 @@
 - [x] Commit any fixes to main
 
 **Step 1B: Deploy to Consumer Repos**
-1. ✅ All consumer repos have verifier labels (6/7 active, Collab-Admin pending)
+1. ✅ All consumer repos have verifier labels (7/7 - all synced)
 2. ✅ Sync workflow runs automatically on template changes
 3. ✅ **Major cleanup completed 2026-01-07:**
    - 26 superseded sync PRs closed across 5 consumer repos
-   - 5 most recent sync PRs merged successfully
-   - Collab-Admin PR #113 blocked by lint failures (Python CI / lint-ruff)
+   - 6 most recent sync PRs merged successfully (including Collab-Admin PR #113)
    - **Bot Comment Analysis:** Reviewed 40+ comments across sync PRs
      - **Finding:** Zero substantive code review comments from Copilot/Codex agent bots
      - All comments were keepalive/autofix operational noise (status updates, missing-issue warnings)
@@ -213,7 +230,7 @@
 **Step 2A: Labels & Sync**
 1. ✅ Labels created via sync workflow (`agents:format`, `agents:formatted`, `agents:optimize`, `agents:apply-suggestions`)
 2. ✅ `agents-issue-optimizer.yml` is in sync manifest
-3. ✅ Sync PRs merged (5/6 repos as of 2026-01-07, Collab-Admin pending)
+3. ✅ Sync PRs merged (7/7 repos as of 2026-01-07, all synced)
 4. ✅ **Tested on Manager-Database #184:**
    - ✅ Created unstructured test issue
    - ✅ Added `agents:optimize` label → Workflow posted valuable analysis (8.6/10 quality)
@@ -237,21 +254,140 @@
   - ✅ Labels updated correctly (`agents:formatted` added)
   - ✅ **Updated:** Now uses `use_llm=True` to populate sections from analysis - pending retest
 
-### Phase 3: Archive Unused Scripts (1 Step)
+### Phase 3: Pre-Agent Intelligence (4 Steps)
+
+**Status: Planning - Test Cycle Defined**
+
+**Step 3A: Capability Check Integration**
+
+1. **Relationship to existing workflows:**
+   - `agents:optimize` → "Is this issue well-written?" (quality check)
+   - `capability_check.py` → "Can the agent DO this?" (feasibility gate)
+   - **Answer:** Supplements optimizer, runs BEFORE agent assignment on Issues
+
+2. **Proposed workflow integration:**
+   ```
+   Issue Created → agents:optimize (quality) → agents:apply-suggestions (format)
+                                                        ↓
+   User adds agent:codex → capability_check.py runs → If NOT capable:
+                                                        → Add needs-human label
+                                                        → Post blocker explanation
+                                                      If capable:
+                                                        → Proceed with agent
+   ```
+
+3. **Implementation tasks:**
+   - [ ] Create `agents-capability-check.yml` workflow
+   - [ ] Add `needs-human` label to consumer repos via sync
+   - [ ] Trigger on `agent:codex` label added OR new workflow label
+   - [ ] Post comment explaining blockers when agent cannot proceed
+
+**Step 3B: Task Decomposition**
+
+1. **Implementation tasks:**
+   - [ ] Create `agents-decompose.yml` workflow
+   - [ ] Add `agents:decompose` label to label sync config
+   - [ ] Call `task_decomposer.py` when label applied
+   - [ ] Output: Either create sub-issues OR add checklist to parent
+
+**Step 3C: Duplicate Detection (Testing Focus)**
 
-**Status: Decision Deferred**
+1. **Critical concern:** False positives - we don't want to close valid issues
+2. **Approach:** Comment-only mode first, no auto-close
+3. **Implementation tasks:**
+   - [ ] Create `agents-dedup.yml` workflow
+   - [ ] Trigger on issue opened
+   - [ ] Compare against open issues using embeddings
+   - [ ] Post comment if >85% similarity detected (link to potential duplicate)
+   - [ ] Track false positive rate over testing period
 
-These scripts are fully tested (145 tests passing) but not yet integrated:
-- `capability_check.py` - Pre-flight check for agent capability on tasks
-- `task_decomposer.py` - Break large tasks into smaller actionable items  
-- `issue_dedup.py` - Detect duplicate issues via embeddings
-- `label_matcher.py` - Semantic label matching
-- `semantic_matcher.py` - Shared embedding utilities
+4. **Testing metrics to track:**
+   - True positive rate (correctly identified duplicates)
+   - False positive rate (target: <5%)
+   - Human override rate (user keeps both issues open)
 
-**Recommendation:** Keep & Document for future Phase 3+ integration
-- All scripts have full test coverage
-- Semantic matching could enhance issue triage
-- Capability check could prevent failed agent attempts
+**Step 3D: Semantic Label Matching**
+
+1. **Implementation tasks:**
+   - [ ] Create `agents-auto-label.yml` workflow OR integrate into existing
+   - [ ] Use `label_matcher.py` for semantic similarity
+   - [ ] Post comment with suggestions OR auto-apply at >90% confidence
+
+---
+
+## Phase 3 Testing Plan (Manager-Database)
+
+**Test Repository:** Manager-Database
+**Test Duration:** 2 weeks (7 issues minimum)
+**Start Date:** Ready to begin (all consumer repos synced)
+
+### Test Issue #1: Capability Check Validation
+
+**Purpose:** Validate capability_check.py correctly identifies agent blockers
+
+**Test Scenarios:**
+1. **Issue requiring external API** - Should flag "needs credentials/external dependency"
+2. **Issue requiring database migration** - Should flag "needs infrastructure/manual step"
+3. **Normal code-only issue** - Should pass capability check
+
+**Test Issue Ideas for Manager-Database:**
+- "Integrate with external payment API" (should fail - external dep)
+- "Add database migration for new schema" (should fail - infra)
+- "Refactor logging module" (should pass - code only)
+
+### Test Issue #2: Task Decomposition Validation
+
+**Purpose:** Validate task_decomposer.py produces useful sub-tasks
+
+**Test Scenario:**
+- Create large issue with 5+ implied tasks
+- Apply `agents:decompose` label
+- Verify sub-tasks are actionable and correctly scoped
+
+**Test Issue Idea:**
+- "Implement comprehensive health check endpoint with retry logic, circuit breaker, metrics, and alerting integration"
+
+### Test Issue #3: Duplicate Detection Validation
+
+**Purpose:** Measure false positive rate for issue_dedup.py
+
+**Test Scenarios:**
+1. **True duplicate** - Create issue very similar to existing (should detect)
+2. **Related but different** - Create issue in same area but different ask (should NOT flag)
+3. **Unrelated** - Create issue in different area (should NOT flag)
+
+**Success Criteria:**
+- True positives detected: 100%
+- False positive rate: <5%
+- Clear explanation in comment linking to potential duplicate
+
+### Test Issue #4: Label Matching Validation
+
+**Purpose:** Validate label_matcher.py suggests correct labels
+
+**Test Scenario:**
+- Create unlabeled issues in different categories
+- Verify label suggestions match expected labels
+- Track suggestion accuracy
+
+### Test Issues Created (Manager-Database)
+
+| Issue | Purpose | Expected Result |
+|-------|---------|-----------------|
+| #193 | Capability Check - External service (Stripe) | ❌ SHOULD FAIL - requires Stripe API credentials, webhook endpoint |
+| #194 | Task Decomposition - Large issue (10 tasks) | ✅ SHOULD DECOMPOSE - into 3-5 sub-issues or checklist |
+| #196 | Duplicate Detection - Similar to #133 | ⚠️ SHOULD DETECT - ~85%+ similarity to "Add GET Endpoint for Managers List" |
+
+### Testing Metrics Dashboard
+
+| Script | Test Issues | True Positives | False Positives | Accuracy | Status |
+|--------|-------------|----------------|-----------------|----------|--------|
+| capability_check.py | #193 (1/3) | - | - | - | 🔄 Testing |
+| task_decomposer.py | #194 (1/2) | - | - | - | 🔄 Testing |
+| issue_dedup.py | #196 (1/3) | - | - | <5% target | 🔄 Testing |
+| label_matcher.py | 0/3 | - | - | - | ⏳ Pending |
+
+**Total test issues created:** 3/11 on Manager-Database
 
 ---
 
@@ -259,11 +395,25 @@ These scripts are fully tested (145 tests passing) but not yet integrated:
 
 | Phase | Scope | Steps | Test Repo | Status |
 |-------|-------|-------|-----------|--------|
-| 1 | PR Verification | 2 | Manager-Database | ✅ Deployed, 5/6 repos synced |
+| 1 | PR Verification | 2 | Manager-Database | ✅ Deployed, 7/7 repos synced |
 | 2 | Issue Formatting | 1 | Manager-Database | ✅ Deployed & tested - Quality: 7.5/10 |
-| 3 | Cleanup/Archive | 1 | N/A | Deferred (scripts retained) |
+| 3 | Pre-Agent Intelligence | 4 | Manager-Database | 🔄 Testing - 3/11 test issues created |
+| 4 | Full Automation & Cleanup | 5 | Manager-Database | 📋 Planning |
 
-**Total: 4 deployment actions** - All infrastructure deployed. Major sync cleanup completed 2026-01-07 (26 superseded PRs closed, 5/6 repos synced). Collab-Admin PR #113 blocked by lint failures.
+**Phase 3 Components:**
+- **3A:** Capability Check - Pre-agent feasibility gate (supplements agents:optimize)
+- **3B:** Task Decomposition - Auto-split large issues
+- **3C:** Duplicate Detection - Comment-only mode, track false positives
+- **3D:** Semantic Labeling - Auto-suggest/apply labels
+
+**Phase 4 Components:**
+- **4A:** Label Cleanup - Remove bloat, standardize across repos
+- **4B:** User Guide - Operational documentation for label system
+- **4C:** Auto-Pilot Label - End-to-end issue-to-merge automation
+- **4D:** Conflict Resolution - Automated merge conflict handling in keepalive
+- **4E:** Verify-to-Issue - Create follow-up issues from verification feedback
+
+**Total: 12 deployment actions** - Phases 1-2 deployed. Phases 3-4 in planning/testing.
 
 **Substantive Quality Assessment:**
 - **agents:optimize:** 8.6/10 - Provides valuable, actionable analysis
@@ -277,8 +427,8 @@ These scripts are fully tested (145 tests passing) but not yet integrated:
 ### Immediate (Ready Now)
 1. ~~**Merge PR #633**~~ ✅ Merged - GPT-5.2 for compare mode
 2. ~~**Merge PR #643**~~ ✅ Merged - Model name in comparison reports + disable auto-issue creation
-3. ~~**Consumer repo sync cleanup**~~ ✅ Completed 2026-01-07 - 26 superseded PRs closed, 5/6 merged
-4. **Resolve Collab-Admin sync** - ⏳ PR #113 blocked by lint failures (Python CI / lint-ruff)
+3. ~~**Consumer repo sync cleanup**~~ ✅ Completed 2026-01-07 - 26 superseded PRs closed, 6/6 merged
+4. ~~**Resolve Collab-Admin sync**~~ ✅ PR #113 merged 2026-01-07
 5. ~~**Live test `agents:optimize`**~~ ✅ Tested on Manager-Database #184 - Quality: 8.6/10
 6. ~~**Live test `agents:apply-suggestions`**~~ ✅ Tested on Manager-Database #184 - Quality: 6/10
 
@@ -294,11 +444,632 @@ These scripts are fully tested (145 tests passing) but not yet integrated:
    - "Implement logging before health checks"
    - "Retry logic blocks enhanced error logging"
 
+### Phase 3 Implementation (Next)
+1. **Step 3A: Capability Check** - Create `agents-capability-check.yml`, integrate with issue workflow
+   - Supplements existing agents:optimize (quality) with feasibility gate
+   - Runs BEFORE agent assignment, not after
+2. **Step 3B: Task Decomposition** - Create `agents-decompose.yml` workflow
+3. **Step 3C: Duplicate Detection** - Create `agents-dedup.yml` (comment-only, track false positives)
+4. **Step 3D: Label Matching** - Integrate into issue workflow
+
 ### Future Enhancements
 1. **Compare mode refinement** - Currently uses gpt-4o (GitHub) vs gpt-5.2 (OpenAI)
 2. **Model auto-update** - Use `scripts/update_model_list.sh` periodically
 3. **Domain-specific guidance** - Add prompts for retry patterns, health check endpoints
-4. **Phase 3 scripts** - Decide on capability_check.py and task_decomposer.py integration
+
+---
+
+## Phase 4: Full Automation & Cleanup (5 Initiatives)
+
+> **Status:** Planning  
+> **Goal:** Streamline end-to-end automation from issue to merged PR
+
+### 4A. Label Cleanup & Standardization
+
+**Problem:** Consumer repos have accumulated label bloat (30+ labels in some repos) with many unused/redundant labels like `stage 0`, `codex`, `ai:agent`, etc.
+
+**Idiosyncratic Repo Bloat Strategy:**
+Each consumer repo has accumulated repo-specific labels (e.g., `architecture`, `backend`, `cli`, `config`, `data`, `engine`, `app`) that aren't synced and aren't used by automation. These create visual clutter and confusion about which labels have functional effects.
+
+**Cleanup approach:**
+1. **Audit each repo** - List all labels not in canonical set
+2. **Classify** - Determine if repo-specific label is:
+   - Used in repo-specific workflows (keep)
+   - Used for human categorization (optional keep - user choice)
+   - Unused/obsolete (remove)
+3. **Create per-repo cleanup PR** - With list of labels to remove and justification
+4. **Human approval required** - Repo maintainer reviews and approves before execution
+
+**Functional Labels (Keep - Have Workflow Effects):**
+
+| Label | Trigger | Applies To | Used By |
+|-------|---------|------------|---------|
+| `agent:codex` | Issue intake, keepalive | Issues, PRs | `agents-issue-intake.yml`, `agents-keepalive-loop.yml` |
+| `agent:claude` | Issue intake (future) | Issues, PRs | `agents-issue-intake.yml` |
+| `agent:copilot` | Issue intake (future) | Issues, PRs | `agents-issue-intake.yml` |
+| `agent:needs-attention` | Auto-applied when stuck | Issues, PRs | Multiple workflows |
+| `agents` (bare) | Issue template auto-label | Issues | `agent_task.yml` template |
+| `agents:format` | Direct formatting | Issues | `agents-issue-optimizer.yml` |
+| `agents:formatted` | Auto-applied after format | Issues | `agents-issue-optimizer.yml` |
+| `agents:optimize` | Analyze + suggest | Issues | `agents-issue-optimizer.yml` |
+| `agents:apply-suggestions` | Apply suggestions | Issues | `agents-issue-optimizer.yml` |
+| `agents:allow-change` | Override agents-guard | PRs | `agents-guard.yml` |
+| `agents:keepalive` | Enable keepalive loop | PRs | `agents-keepalive-loop.yml` |
+| `agents:activated` | Track first human activation | PRs | `agents_pr_meta_keepalive.js` |
+| `agents:paused` | Pause/paused keepalive | PRs | `keepalive_gate.js`, `keepalive-runner.js` |
+| `autofix` | Trigger autofix | PRs | `autofix.yml` |
+| `autofix:clean` | Aggressive autofix | PRs | `autofix.yml` |
+| `autofix:bot-comments` | Address bot comments | PRs | `agents-bot-comment-handler.yml` |
+| `autofix:applied` | Auto-applied | PRs | Autofix workflows |
+| `automerge` | Enable auto-merge | PRs | `merge_manager.js`, `agents_belt_scan.js` |
+| `from:codex` | Track PR origin | PRs | `merge_manager.js` |
+| `from:copilot` | Track PR origin | PRs | `merge_manager.js` |
+| `risk:low` | Low-risk for auto-approve | PRs | `merge_manager.js` |
+| `ci:green` | CI status tracking | PRs | `merge_manager.js` |
+| `codex-ready` | Ready for Codex | Issues | Issue templates |
+| `verify:checkbox` | Checkbox verification | PRs (merged) | `agents-verifier.yml` |
+| `verify:evaluate` | LLM evaluation | PRs (merged) | `agents-verifier.yml` |
+| `verify:compare` | Multi-model comparison | PRs (merged) | `agents-verifier.yml` |
+| `needs-human` | Human intervention needed | Issues, PRs | Multiple workflows |
+| `sync` | From sync workflow | PRs | Sync workflows |
+| `automated` | Bot-created | Issues, PRs | Multiple workflows |
+| `coverage` | Coverage issue tracking | Issues | `maint-coverage-guard.yml` |
+
+**Informational Labels (Keep - Useful Categorization):**
+
+| Label | Purpose |
+|-------|---------|
+| `bug` | Bug reports |
+| `enhancement` | Feature requests |
+| `documentation` | Doc changes |
+| `duplicate` | Duplicate tracking |
+| `wontfix` | Won't address |
+
+**Labels to Remove (Verified No Functional Effect):**
+
+| Label | Reason | Searched | Result |
+|-------|--------|----------|--------|
+| `codex` (bare) | Redundant with `agent:codex` | ✅ | No workflow triggers on this |
+| `agents:pause` | Redundant with `agents:paused` | ✅ | Consolidated to `agents:paused` |
+| `ai:agent` | Redundant | ✅ | Zero matches in codebase |
+| `auto-merge-audit` | Unused | ✅ | Zero matches in codebase |
+| `automerge:ok` | Unused variant | ✅ | Zero matches in codebase |
+
+**⚠️ CORRECTED from initial analysis:** The following labels ARE functional and should NOT be removed:
+- `agents` (bare) - Used by issue templates
+- `agents:activated` - Tracks human activation state
+- `agents:paused` - Controls keepalive pausing (consolidated from agents:pause)
+- `automerge` - Enables auto-merge in merge_manager.js
+- `from:codex` / `from:copilot` - Used by merge_manager.js for origin tracking
+- `risk:low` / `ci:green` / `codex-ready` - Used by merge_manager and issue templates
+
+**Implementation:**
+- [ ] Create `scripts/cleanup_labels.py` to remove ONLY verified bloat labels
+- [ ] Audit each consumer repo for idiosyncratic labels
+- [ ] Create per-repo cleanup PR with human approval gate
+- [ ] Update `docs/LABELS.md` with canonical label list
+- [ ] Add label validation to sync workflow
+
+### 4B. Workflow User Guide Document
+
+> **Status:** Deferred until Phases 4A, 4C-4E complete
+
+**Problem:** Users don't know how to use the label system effectively.
+
+**Solution:** Create `docs/WORKFLOW_USER_GUIDE.md` with:
+
+1. **Quick Start** - Most common workflows with copy-paste examples
+2. **Issue Creation Flow** - Step-by-step from idea to formatted issue
+3. **PR Automation Flow** - How labels progress a PR to merge
+4. **Label Decision Tree** - "What label should I add?"
+5. **Troubleshooting** - Common issues and solutions
+6. **Optional: Issue Creation from Doc** - Command/workflow to create issue from guide sections
+
+**Sections:**
+
+```markdown
+## Creating an Agent-Ready Issue
+
+1. Create issue with rough description
+2. Add `agents:optimize` label → Review suggestions
+3. Add `agents:apply-suggestions` label → Issue formatted
+4. Add `agent:codex` label → Agent starts work
+
+## Monitoring Agent Progress
+
+- Check PR for `agent:needs-attention` label
+- Review keepalive comments for status
+- Add `autofix` if CI failing on simple issues
+
+## Post-Merge Verification
+
+- Add `verify:evaluate` after merge for LLM review
+- Add `verify:compare` for multi-model comparison
+```
+
+**Optional Issue Creation Feature:**
+```markdown
+## Quick Issue from Guide
+
+At end of each workflow section, include:
+- "Create issue to implement this" link
+- Pre-populated with section content as template
+- Links back to guide for context
+```
+
+**Implementation:**
+- [ ] Create `docs/WORKFLOW_USER_GUIDE.md`
+- [ ] Add to sync-manifest.yml
+- [ ] Add prominent link in each repo's README
+- [ ] Consider GitHub wiki integration
+- [ ] **Optional:** Add issue creation links per section
+
+### 4C. Master Automation Label (`agents:auto-pilot`)
+
+**Goal:** Single label for complete issue-to-merged-PR automation.
+
+**Proposed Flow:**
+
+```
+User adds `agents:auto-pilot` to issue
+          ↓
+Step 1: agents:format (initial structure)
+          ↓
+Step 2: agents:optimize → agents:apply-suggestions
+          ↓
+Step 3: capability_check.py runs
+          ↓ (if capable)
+Step 4: agent:codex applied → PR created
+          ↓
+Step 5: autofix + agents:keepalive applied to PR
+          ↓
+Step 6: Gate passes + acceptance criteria met
+          ↓
+Step 7: Auto-merge (if enabled + all checks pass)
+          ↓
+Step 8: verify:evaluate on merged PR
+```
+
+**Feasibility Analysis:**
+
+| Step | Challenge | Mitigation |
+|------|-----------|------------|
+| Sequential labels | GitHub doesn't support chained label triggers | Use workflow_dispatch between steps |
+| Race conditions | Multiple workflows competing | Concurrency groups + state tracking |
+| Error handling | What if step fails? | Add `agents:auto-pilot-failed` + comment explaining failure |
+| User expectations | Users expect instant completion | Post progress comments at each step |
+| Rollback | What if we need to stop? | `agents:auto-pilot-pause` label |
+
+**Major Risks:**
+1. **Runaway automation** - Agent creates bad PR, auto-merges, creates more issues
+   - Mitigation: Max iterations, human approval gates for auto-merge
+2. **CI instability** - Flaky tests block automation indefinitely
+   - Mitigation: Timeout after N keepalive cycles, escalate to `needs-human`
+3. **Token exhaustion** - Long sessions burn through LLM quota
+   - Mitigation: Per-issue token budget tracking
+
+**Implementation:**
+- [ ] Design state machine for auto-pilot flow
+- [ ] Create `agents-auto-pilot.yml` orchestrator workflow
+- [ ] Add progress tracking comments
+- [ ] Implement failure handling and rollback
+- [ ] Add `agents:auto-pilot-pause` for manual intervention
+- [ ] Test on Manager-Database with controlled issues
+
+### 4D. Conflict Resolution in Keepalive
+
+**Problem:** Most common reason keepalive stalls is merge conflicts. Agents handle conflicts well when prompted, but current pipeline doesn't automatically detect/respond.
+
+**Current State:**
+- Keepalive detects "Gate failed" but doesn't distinguish conflict from test failure
+- Agent eventually addresses conflicts but wastes cycles
+
+**Full Implementation Plan:**
+
+**Step 1: Conflict Detection Module**
+Create `scripts/conflict_detector.js`:
+```javascript
+// Detect merge conflicts via git status and CI logs
+async function detectConflicts(github, context, prNumber) {
+  // Method 1: Check GitHub's mergeable_state
+  const pr = await github.rest.pulls.get({
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    pull_number: prNumber
+  });
+  
+  if (pr.data.mergeable_state === 'dirty') {
+    return { hasConflict: true, source: 'github-api' };
+  }
+  
+  // Method 2: Parse CI logs for conflict markers
+  const runs = await github.rest.actions.listWorkflowRunsForRepo({...});
+  // Look for: "CONFLICT", "merge conflict", "Automatic merge failed"
+  
+  return { hasConflict: false };
+}
+```
+
+**Step 2: Update Keepalive Gate**
+Modify `keepalive_gate.js`:
+```javascript
+// After gate failure, check if conflict
+const conflictResult = await detectConflicts(github, context, prNumber);
+if (conflictResult.hasConflict) {
+  core.setOutput('skip_reason', 'merge-conflict');
+  core.setOutput('conflict_files', conflictResult.files.join(', '));
+  // Trigger conflict-specific prompt
+  return;
+}
+```
+
+**Step 3: Conflict Resolution Prompt**
+Create `.github/codex/prompts/fix_merge_conflicts.md`:
+```markdown
+# Task: Resolve Merge Conflicts
+
+This PR has merge conflicts that need to be resolved.
+
+## Conflict Files
+{{conflict_files}}
+
+## Instructions
+1. Fetch the latest changes from main/master branch
+2. Identify and resolve each conflict, keeping the intent of both changes
+3. Run tests to ensure resolution doesn't break functionality
+4. Commit with message: "fix: resolve merge conflicts with main"
+
+## Priority
+- Prefer the PR's changes when semantically equivalent
+- If main has breaking changes, adapt PR code to new API
+- When in doubt, keep both changes if they're additive
+```
+
+**Step 4: Integration with Keepalive Loop**
+Add to `agents-keepalive-loop.yml`:
+```yaml
+- name: Check for conflicts
+  id: conflict-check
+  uses: ./.github/actions/conflict-detector
+  with:
+    pr_number: ${{ inputs.pr_number }}
+
+- name: Use conflict prompt if needed
+  if: steps.conflict-check.outputs.has_conflict == 'true'
+  run: |
+    echo "prompt_override=fix_merge_conflicts.md" >> $GITHUB_OUTPUT
+```
+
+**Step 5: Metrics & Logging**
+- Track: conflicts detected, conflicts resolved, resolution time
+- Log: conflict files, resolution commits, manual escalations
+
+**Implementation Checklist:**
+- [ ] Create `scripts/conflict_detector.js`
+- [ ] Add conflict detection to `keepalive_gate.js`
+- [ ] Create `.github/codex/prompts/fix_merge_conflicts.md`
+- [ ] Update `agents-keepalive-loop.yml` to use conflict prompt
+- [ ] Add conflict metrics to keepalive summary
+- [ ] Create `error_classifier.js` enhancements for conflict patterns
+- [ ] Test with intentionally conflicted branches on Manager-Database
+
+### 4E. Verification-to-Issue Workflow
+
+**Problem:** When `verify:evaluate` or `verify:compare` identifies issues, there's no automated way to create follow-up work.
+
+**Note:** We previously disabled automatic issue creation because it was too aggressive. This is a **user-triggered** alternative.
+
+**Proposed Label:** `verify:create-issue`
+
+**Flow:**
+
+```
+User reviews verify comment on merged PR
+          ↓
+User adds `verify:create-issue` label
+          ↓
+Workflow extracts:
+  - CONCERNS from evaluation report
+  - Specific scores <7/10
+  - Unique insights from comparison
+          ↓
+Creates new issue:
+  - Title: "[Follow-up] {concern summary} from PR #{number}"
+  - Body: Structured with original PR link, specific concerns, suggested tasks
+  - Labels: `agents:optimize` (ready for agent formatting)
+          ↓
+Posts comment on PR linking to new issue
+```
+
+**Full Implementation:**
+
+**Create `agents-verify-to-issue.yml`:**
+```yaml
+name: Create Issue from Verification
+
+on:
+  pull_request:
+    types: [labeled]
+
+jobs:
+  create-issue:
+    if: github.event.label.name == 'verify:create-issue'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Find verification comment
+        id: find-comment
+        uses: peter-evans/find-comment@v3
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          body-includes: "## PR Verification Report"
+      
+      - name: Extract concerns
+        id: extract
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const comment = `${{ steps.find-comment.outputs.comment-body }}`;
+            // Parse CONCERNS section
+            const concernsMatch = comment.match(/### Concerns\n([\s\S]*?)(?=###|$)/);
+            const concerns = concernsMatch ? concernsMatch[1].trim() : 'No specific concerns found';
+            
+            // Parse low scores
+            const scoreMatches = [...comment.matchAll(/(\w+):\s*(\d+)\/10/g)];
+            const lowScores = scoreMatches
+              .filter(m => parseInt(m[2]) < 7)
+              .map(m => `${m[1]}: ${m[2]}/10`);
+            
+            core.setOutput('concerns', concerns);
+            core.setOutput('low_scores', lowScores.join(', ') || 'None below 7/10');
+      
+      - name: Create follow-up issue
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const prNumber = context.payload.pull_request.number;
+            const prTitle = context.payload.pull_request.title;
+            const concerns = `${{ steps.extract.outputs.concerns }}`;
+            const lowScores = `${{ steps.extract.outputs.low_scores }}`;
+            
+            const issueBody = `## Follow-up from PR #${prNumber}
+            
+**Original PR:** #${prNumber} - ${prTitle}
+
+## Concerns Identified
+
+${concerns}
+
+## Scores Below Threshold
+
+${lowScores}
+
+## Suggested Tasks
+
+- [ ] Address the concerns listed above
+- [ ] Update tests if needed
+- [ ] Re-verify after changes
+
+---
+*This issue was automatically created from verification feedback. Add \`agents:optimize\` to refine.*`;
+
+            const issue = await github.rest.issues.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              title: `[Follow-up] Address verification concerns from PR #${prNumber}`,
+              body: issueBody,
+              labels: ['agents:optimize', 'follow-up']
+            });
+            
+            // Comment on original PR
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body: `📋 Follow-up issue created: #${issue.data.number}`
+            });
+            
+            // Remove the trigger label
+            await github.rest.issues.removeLabel({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              name: 'verify:create-issue'
+            });
+```
+
+**Implementation Checklist:**
+- [ ] Create `agents-verify-to-issue.yml` workflow
+- [ ] Add `verify:create-issue` label to sync config
+- [ ] Add `follow-up` label to sync config
+- [ ] Test on Travel-Plan-Permission or Manager-Database
+- [ ] Add to sync manifest for consumer repos
+
+---
+
+## Phase 4 Testing Plan
+
+**Test Repository:** Manager-Database  
+**Test Duration:** 2 weeks
+
+### Test 4A: Label Cleanup
+
+1. Count labels before cleanup
+2. Run cleanup script
+3. Verify functional labels still work
+4. Confirm bloat labels removed
+
+### Test 4B: User Guide
+
+1. Create guide document
+2. Test each documented flow
+3. Gather feedback on clarity
+
+### Test 4C: Auto-Pilot (High Risk - Careful Testing)
+
+**Test Issue Ideas:**
+- Simple refactoring task (low risk)
+- Bug fix with clear acceptance criteria
+- NOT: Large features or infrastructure changes
+
+**Success Criteria:**
+- Issue → Merged PR in <2 hours (for simple tasks)
+- No runaway automation
+- Clear progress visibility
+- Graceful failure handling
+
+### Test 4D: Conflict Resolution
+
+1. Create PR with intentional conflict
+2. Verify conflict detection triggers
+3. Confirm agent resolves conflict
+4. Measure cycle efficiency improvement
+
+### Test 4E: Verify-to-Issue
+
+1. Use existing verify:evaluate results
+2. Add verify:create-issue label
+3. Confirm issue created with proper context
+4. Verify issue is agent-ready
+
+---
+
+## Additional Automation Opportunities (Phase 5+)
+
+### 5A. Auto-labeling on PR Creation ✅ READY
+
+**Status:** Script exists (`label_matcher.py`), workflow integration needed.
+
+**Script location:** `scripts/langchain/label_matcher.py`
+- Uses semantic embeddings to match issue/PR content to labels
+- Configurable confidence threshold (default 80%)
+- Already has tests in place
+
+**Implementation plan:**
+- [ ] Create `agents-auto-label.yml` workflow
+- [ ] Trigger on PR opened
+- [ ] Call `label_matcher.py` with PR title + body + changed files
+- [ ] Apply labels at >90% confidence OR post comment with suggestions
+
+### 5B. Coverage Regression Auto-Issue ✅ EXISTS
+
+**Status:** Already implemented in `maint-coverage-guard.yml`!
+
+**Current behavior:**
+- Runs daily on schedule
+- Compares current coverage to `config/coverage-baseline.json`
+- Creates/updates issue titled "[coverage] baseline breach" when below threshold
+- Labels with `coverage`
+
+**Suggested enhancement:** Add soft check to PRs (warn, don't fail)
+- [ ] Add optional PR check that posts coverage delta as comment
+- [ ] Warning only - does not block merge or automation
+- [ ] Shows trend: "Coverage changed: 82% → 79% (-3%)"
+
+### 5C. Stale PR Cleanup ❌ NOT NEEDED
+
+**Decision:** Not an issue in these repos currently. Skip.
+
+### 5D. Dependency Update Automation ⚠️ PARTIAL
+
+**Current state:**
+- `maint-dependabot-auto-label.yml` - Adds `agents:allow-change` label to dependabot PRs
+- `maint-dependabot-auto-lock.yml` - Regenerates requirements.lock for pyproject.toml changes
+
+**Missing:**
+- Auto-merge when CI passes
+- Currently dependabot PRs require manual merge
+
+**Implementation plan:**
+- [ ] Add auto-merge step to dependabot workflow
+- [ ] Condition: CI green + no security alerts + minor/patch version only
+- [ ] Skip for major version bumps (require human review)
+
+### 5E. Issue Template Enforcement (Soft Warning)
+
+**Approach:** Warn, don't block.
+
+**Implementation plan:**
+- [ ] Create `agents-issue-lint.yml` workflow
+- [ ] Trigger on issue opened/edited
+- [ ] Check for AGENT_ISSUE_TEMPLATE sections (Why, Scope, Tasks, Acceptance)
+- [ ] If missing sections:
+  - Post friendly comment suggesting `agents:format` label
+  - Add `needs-formatting` label
+  - Do NOT close or block the issue
+
+### 5F. Cross-Repo Issue Linking ❌ SKIPPED
+
+**Decision:** Not implementing. Complexity outweighs benefit for current repo scale.
+
+### 5G. Agent Performance Dashboard (LangSmith + Custom)
+
+**Strategy:** Use LangSmith for LLM operations, custom GitHub metrics for workflow stats.
+
+**LangSmith Integration (Recommended for LLM Metrics):**
+
+LangSmith provides out-of-the-box tracking for:
+- Token usage per operation (prompt + completion)
+- Latency by provider/model
+- Success/failure rate by prompt
+- Cost tracking per provider
+- Trace visualization for debugging
+
+**Implementation:**
+```python
+# In llm_provider.py - add LangSmith tracing
+import os
+os.environ["LANGCHAIN_TRACING_V2"] = "true"
+os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
+os.environ["LANGCHAIN_PROJECT"] = "workflows-agents"
+
+# Traces automatically captured for:
+# - pr_verifier.py evaluations
+# - issue_optimizer.py analysis
+# - issue_formatter.py LLM calls
+# - Any future LangChain operations
+```
+
+**Custom Workflow Metrics (GitHub-based):**
+
+| Metric | Source | Collection Method |
+|--------|--------|-------------------|
+| Issues created → PR merged time | Issue/PR timestamps | GitHub API query |
+| Keepalive cycles per PR | Workflow run count | Count runs per PR |
+| Agent success rate | PR merge status | Merged vs closed without merge |
+| Autofix effectiveness | Commits per PR | Count autofix commits |
+| CI pass rate first try | Gate workflow | First run success % |
+| Conflict resolution time | Conflict detect → resolve | Timestamp diff |
+
+**Implementation Checklist:**
+- [ ] Add `LANGSMITH_API_KEY` secret to Workflows repo
+- [ ] Update `tools/llm_provider.py` with tracing env vars
+- [ ] Create LangSmith project "workflows-agents"
+- [ ] Create `scripts/agent_metrics.py` for GitHub API stats
+- [ ] Add `maint-agent-metrics.yml` weekly workflow
+- [ ] Output: Post summary to wiki or store in repo
+
+**Dashboard Views:**
+1. **LangSmith Dashboard:** Token usage, latency, errors by model/prompt
+2. **GitHub Actions Insights:** Workflow run times, success rates
+3. **Custom Metrics Report:** Weekly summary posted to wiki/README
+
+---
+
+## Implementation Priority
+
+| Initiative | Effort | Value | Priority | Notes |
+|------------|--------|-------|----------|-------|
+| 4A. Label Cleanup | Low | Medium | ✅ Ready | 5 bloat labels + per-repo audit |
+| 4B. User Guide | Medium | High | Defer | After other features stable |
+| 4C. Auto-Pilot | High | High | Test carefully | Most complex |
+| 4D. Conflict Resolution | Medium | High | ✅ Ready | Full implementation planned |
+| 4E. Verify-to-Issue | Low | Medium | ✅ Ready | Full workflow designed |
+| 5A. Auto-labeling | Low | Medium | ✅ Ready | Script exists |
+| 5B. Coverage PR Check | Low | Medium | ✅ Ready | Soft warning only |
+| 5D. Dependabot Auto-merge | Low | Medium | ✅ Ready | Extend existing |
+| 5E. Issue Lint | Low | Low | Later | Nice to have |
+| 5F. Cross-Repo Linking | - | - | ❌ Skipped | Not needed |
+| 5G. Metrics Dashboard | Medium | Medium | ✅ Ready | LangSmith + custom |
 
 ### Test Results Documentation
 Full substantive analysis available at `/tmp/substantive_test_analysis.md`:
diff --git a/templates/consumer-repo/README.md b/templates/consumer-repo/README.md
index 0b2afddd1..1404e4f08 100644
--- a/templates/consumer-repo/README.md
+++ b/templates/consumer-repo/README.md
@@ -187,7 +187,7 @@ The keepalive system uses PR labels for routing and control:
 ### Control Labels
 | Label | Effect |
 |-------|--------|
-| `agents:pause` | Halts all agent activity on PR |
+| `agents:paused` | Halts all agent activity on PR |
 | `agents:max-parallel:N` | Overrides concurrent run limit (default: 1) |
 | `needs-human` | Auto-added after repeated failures, blocks keepalive |
 
@@ -208,7 +208,7 @@ Keepalive dispatches an agent only when **ALL** conditions are met:
 2. Gate workflow completed successfully
 3. PR body contains unchecked tasks in Automated Status Summary
 4. Not at concurrency limit (default: 1 concurrent run per PR)
-5. No `agents:pause` or `needs-human` labels present
+5. No `agents:paused` or `needs-human` labels present
 
 ### Progress Tracking
 - Agent updates checkboxes in PR body after completing tasks
@@ -222,8 +222,8 @@ After 3 consecutive failures:
 3. Fix issues, then remove `needs-human` label to resume
 
 ### Manual Control
-- **Pause**: Add `agents:pause` label
-- **Resume**: Remove `agents:pause` or `needs-human` label
+- **Pause**: Add `agents:paused` label
+- **Resume**: Remove `agents:paused` or `needs-human` label
 - **Restart**: Remove and re-add the `agent:*` label
 - **Force retry**: Use workflow_dispatch with PR number
 
@@ -302,7 +302,7 @@ When using agent workflows, the recommended flow is:
 - Check PR has `agent:*` label
 - Verify Gate workflow passed
 - Ensure PR body has Automated Status Summary with unchecked tasks
-- Check for `agents:pause` or `needs-human` labels
+- Check for `agents:paused` or `needs-human` labels
 - Review keepalive summary comment for skip reasons
 
 **2. No Automated Status Summary**