stranske · stranske · Jan 3, 2026 · Jan 3, 2026 · Jan 3, 2026 · Jan 3, 2026
@@ -45,8 +45,34 @@ env:
     stranske/Trend_Model_Project
 
 jobs:
+  # CRITICAL: Verify versions are current BEFORE syncing to consumer repos
+  verify-versions-current:
+    name: Verify versions are current
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Workflows
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: |
+            .github/workflows/autofix-versions.env
+            scripts/update_versions_from_pypi.py
+          sparse-checkout-cone-mode: false
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Verify versions against PyPI
+        run: |
+          echo "🔍 Checking that all versions in autofix-versions.env are current..."
+          python scripts/update_versions_from_pypi.py --check --fail-on-outdated
+          echo ""
+          echo "✅ All versions are current - safe to sync to consumer repos!"
+
   prepare:
     name: Prepare version sync
+    needs: verify-versions-current  # Don't sync until we verify versions are current!
     runs-on: ubuntu-latest
     outputs:
       repos: ${{ steps.repos.outputs.matrix }}
@@ -122,7 +148,7 @@ jobs:
         run: |
           if [ -f "consumer/pyproject.toml" ]; then
             echo "has_pyproject=true" >> "$GITHUB_OUTPUT"
-            
+
             # Check if it has dev dependencies
             if grep -q '\[project.optional-dependencies\]' consumer/pyproject.toml; then
               if grep -qE '^dev\s*=' consumer/pyproject.toml; then
@@ -154,7 +180,7 @@ jobs:
             echo "has_changes=false" >> "$GITHUB_OUTPUT"
           else
             echo "has_changes=true" >> "$GITHUB_OUTPUT"
-            
+
             # Apply if not dry run
             if [ "${{ inputs.dry_run }}" != "true" ]; then
               python ../scripts/sync_dev_dependencies.py --apply --use-minimum-pins
@@ -176,7 +202,7 @@ jobs:
           cd consumer
 
           echo "Adding dev dependencies section to pyproject.toml..."
-          
+
           # Use --create-if-missing to add dev deps
           if python ../scripts/sync_dev_dependencies.py --apply --use-minimum-pins --create-if-missing 2>&1 | tee /tmp/sync_output.txt; then
             if grep -q "version updates" /tmp/sync_output.txt; then
@@ -223,7 +249,7 @@ jobs:
           # Add lockfile if it exists and was modified
           git add pyproject.toml .github/workflows/autofix-versions.env
           if [ -f requirements.lock ]; then git add requirements.lock; fi
-          
+
           # Commit with multi-line message
           commit_msg="deps: sync dev tool versions from Workflows
 

@@ -0,0 +1,125 @@
+# Auto-update dev tool versions from PyPI
+#
+# This workflow ensures autofix-versions.env stays current with PyPI releases.
+# It runs daily and creates a PR if any versions are outdated.
+#
+# CRITICAL: This workflow MUST run before maint-52-sync-dev-versions.yml
+# to ensure we never ship stale versions to consumer repos.
+
+name: Maint Auto-Update PyPI Versions
+
+on:
+  schedule:
+    # Daily at 03:00 UTC (before the weekly sync at 05:00)
+    - cron: '0 3 * * *'
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: 'Preview changes without creating PR'
+        type: boolean
+        default: false
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  check-and-update:
+    name: Check PyPI for updates
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Workflows
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Check for outdated versions
+        id: check
+        run: |
+          echo "🔍 Checking PyPI for latest versions..."
+          python scripts/update_versions_from_pypi.py --check 2>&1 | tee /tmp/check_output.txt
+          # Script exits 0 even for outdated (use --fail-on-outdated for non-zero)
+          # Check output directly for "outdated" to determine if updates are needed
+          if grep -q "outdated" /tmp/check_output.txt; then
+            echo "has_updates=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "has_updates=false" >> "$GITHUB_OUTPUT"
+          fi
+          cat /tmp/check_output.txt
+
+      - name: Update versions
+        id: update
+        if: steps.check.outputs.has_updates == 'true' && inputs.dry_run != true
+        run: |
+          echo "📦 Updating autofix-versions.env with latest PyPI versions..."
+          python scripts/update_versions_from_pypi.py --apply 2>&1 | tee /tmp/update_output.txt
+
+          # Extract update summary for PR body
+          {
+            echo "summary<<EOF"
+            cat /tmp/update_output.txt
+            echo "EOF"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Create PR
+        if: steps.check.outputs.has_updates == 'true' && inputs.dry_run != true
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Configure git
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          # Check if there are actual changes
+          if git diff --quiet .github/workflows/autofix-versions.env; then
+            echo "No changes to commit"
+            exit 0
+          fi
+
+          # Create branch
+          branch="auto/update-pypi-versions-$(date +%Y%m%d)"
+          git checkout -b "$branch"
+
+          # Commit changes
+          git add .github/workflows/autofix-versions.env
+          git commit -m "chore: update dev tool versions from PyPI
+
+          Auto-generated by maint-auto-update-pypi-versions workflow.
+
+          ${{ steps.update.outputs.summary }}"
+
+          # Push and create PR
+          git push origin "$branch"
+
+          gh pr create \
+            --title "chore: update dev tool versions from PyPI" \
+            --body "## Summary
+
+          This PR updates the pinned dev tool versions in \`autofix-versions.env\` to match the latest releases on PyPI.
+
+          ### Changes
+          \`\`\`
+          ${{ steps.update.outputs.summary }}
+          \`\`\`
+
+          ### Why this matters
+          Keeping dev tool versions current ensures:
+          - Consumer repos receive the latest bug fixes and features
+          - We don't ship known-vulnerable or outdated tooling
+          - Version drift between repos is minimized
+
+          ---
+          *Auto-generated by the [maint-auto-update-pypi-versions](.github/workflows/maint-auto-update-pypi-versions.yml) workflow*" \
+            --label "dependencies" \
+            --label "automation"
+
+      - name: Dry run summary
+        if: inputs.dry_run == true
+        run: |
+          echo "🔍 Dry run - would have made these updates:"
+          python scripts/update_versions_from_pypi.py --check
@@ -105,6 +105,7 @@ The gate uses the shared `.github/scripts/detect-changes.js` helper to decide wh
 * [`maint-sync-env-from-pyproject.yml`](../../.github/workflows/maint-sync-env-from-pyproject.yml) syncs dev tool version pins from `pyproject.toml` to `autofix-versions.env` after Dependabot updates land.
 * [`maint-52-validate-workflows.yml`](../../.github/workflows/maint-52-validate-workflows.yml) dry-parses every workflow with `yq`, runs `actionlint` with the repository allowlist, and fails fast when malformed YAML or unapproved actionlint findings slip in.
 * [`maint-52-sync-dev-versions.yml`](../../.github/workflows/maint-52-sync-dev-versions.yml) syncs dev tool versions (ruff, mypy, black, isort, pytest) from `autofix-versions.env` to consumer repository `pyproject.toml` files weekly or on version changes.
+* [`maint-auto-update-pypi-versions.yml`](../../.github/workflows/maint-auto-update-pypi-versions.yml) checks PyPI daily for latest dev tool versions and creates a PR to update `autofix-versions.env` when versions are outdated.
 * [`maint-62-integration-consumer.yml`](../../.github/workflows/maint-62-integration-consumer.yml) runs daily at 05:05 UTC, on release publication, or by manual dispatch to execute the integration-repo scenarios via the reusable Python CI template and keep the integration failure issue updated.
 * [`maint-63-ensure-environments.yml`](../../.github/workflows/maint-63-ensure-environments.yml) ensures agent environments (`agent-standard`, `agent-high-privilege`) exist with appropriate protection rules for environment-gated workflows.
 * [`maint-65-sync-label-docs.yml`](../../.github/workflows/maint-65-sync-label-docs.yml) synchronizes `docs/LABELS.md` to consumer repositories weekly (Sundays 00:00 UTC) or via manual dispatch.

@@ -537,6 +537,10 @@ Keep this table handy when you are triaging automation: it confirms which workfl
   syncs dev tool versions (ruff, mypy, black, isort, pytest) from
   `autofix-versions.env` to consumer repository `pyproject.toml` files
   weekly or when version changes are detected.
+- **Maint Auto-Update PyPI Versions** – `.github/workflows/maint-auto-update-pypi-versions.yml`
+  checks PyPI daily (03:00 UTC) for latest dev tool versions and creates a PR
+  to update `autofix-versions.env` when versions are outdated, ensuring the
+  sync workflow never ships stale versions to consumer repos.
 - **Maint 62 Integration Consumer** – `.github/workflows/maint-62-integration-consumer.yml`
   exercises the reusable Python CI template against the `templates/integration-repo`
   scenarios on a daily schedule (05:05 UTC), on release publication, or via
@@ -666,6 +670,7 @@ Keep this table handy when you are triaging automation: it confirms which workfl
 | **Maint Sync versions.env from pyproject.toml** (`maint-sync-env-from-pyproject.yml`, maintenance bucket) | `push` (`main`, `pyproject.toml`), `workflow_dispatch` | Sync dev tool version pins from `pyproject.toml` into `autofix-versions.env` after changes land. | ⚪ Automatic on main | [Maint sync env runs](https://github.com/stranske/Workflows/actions/workflows/maint-sync-env-from-pyproject.yml) |
 | **Maint 52 Validate Workflows** (`maint-52-validate-workflows.yml`, maintenance bucket) | `pull_request`, `push` (`main`) | Parse every workflow file with `yq`, honour the Actionlint allowlist, and fail fast when syntax errors or lint violations appear. | ⚪ Automatic on PR/main | [Maint 52 workflow validations](https://github.com/stranske/Trend_Model_Project/actions/workflows/maint-52-validate-workflows.yml) |
 | **Maint 52 Sync Dev Versions** (`maint-52-sync-dev-versions.yml`, maintenance bucket) | `schedule` (Sundays 01:00 UTC), `push` (`autofix-versions.env`), `workflow_dispatch` | Sync dev tool versions from `autofix-versions.env` to consumer repository `pyproject.toml` files. | ⚪ Scheduled/manual | [Sync dev versions runs](https://github.com/stranske/Workflows/actions/workflows/maint-52-sync-dev-versions.yml) |
+| **Maint Auto-Update PyPI Versions** (`maint-auto-update-pypi-versions.yml`, maintenance bucket) | `schedule` (daily 03:00 UTC), `workflow_dispatch` | Check PyPI for latest dev tool versions and create a PR to update `autofix-versions.env` when versions are outdated. | ⚪ Scheduled | [Auto-update PyPI versions runs](https://github.com/stranske/Workflows/actions/workflows/maint-auto-update-pypi-versions.yml) |
 | **Maint Coverage Guard** (`maint-coverage-guard.yml`, maintenance bucket) | `schedule` (`45 6 * * *`), `workflow_dispatch` | Audit the latest Gate coverage trend artifact and compare it against the baseline, failing when coverage regresses beyond the guard thresholds. | ⚪ Scheduled | [Maint Coverage Guard runs](https://github.com/stranske/Trend_Model_Project/actions/workflows/maint-coverage-guard.yml) |
 | **Maint 46 Post CI** (`maint-46-post-ci.yml`, maintenance bucket) | `workflow_run` (Gate, `completed`) | Recovery-only: inspect the Gate run for a missing or failed `summary` job; when recovery is needed, collect the Gate artifacts, render the consolidated CI summary with coverage deltas, publish a markdown preview, and refresh the Gate commit status. Otherwise exit immediately. | ⚪ Automatic follow-up | [Maint 46 runs](https://github.com/stranske/Trend_Model_Project/actions/workflows/maint-46-post-ci.yml) |
 | **Maint 45 Cosmetic Repair** (`maint-45-cosmetic-repair.yml`, maintenance bucket) | `workflow_dispatch` | Run pytest + fixers manually and open a labelled PR when changes are required. | ⚪ Manual | [Maint 45 manual entry](https://github.com/stranske/Trend_Model_Project/actions/workflows/maint-45-cosmetic-repair.yml) |

@@ -391,17 +391,90 @@ This keeps the complexity low while allowing natural interaction.
 **Plausibility**: ⭐⭐⭐ MEDIUM-HIGH
 **Scope**: ~2-3 days (data collection), ongoing refinement
 
-### 6. Duplicate/Related Issue Detection
+### 6. Duplicate/Related Issue Detection (Semantic Matching Upgrade)
 
-**Use Case**: Before creating new issue, check if similar work exists.
+**Use Case**: Before creating new issue, check if similar work exists. Also improve label matching from Levenshtein to semantic similarity.
 
-**Approach**:
-- Embed issue description, compare to existing open issues
-- Warn if high similarity detected
-- Link related issues for context
+**The Problem (Current State)**:
+
+*Issue Deduplication:*
+- Existing dedup logic uses exact title matching or Levenshtein distance
+- Levenshtein is good for typos ("fix bug" vs "fxi bug") but bad at semantic similarity
+- "Add unit tests for portfolio module" and "Write test coverage for portfolio.py" are the same intent but have low Levenshtein similarity
+- Result: False negatives (duplicate issues created) and false positives (unrelated issues flagged)
+
+*Label Matching (in `agents-63-issue-intake.yml` lines 601-634):*
+- Current implementation uses Levenshtein distance to find similar labels
+- Works for typos: `bugfix` → matches `bug` ✅
+- Fails for synonyms: `defect` → doesn't match `bug` ❌
+- "enhancement", "feature", "improvement" are semantically equivalent but have no character similarity
+
+**LangChain Solution**:
+- **Embeddings-based similarity** catches "same idea, different phrasing"
+- Uses vector stores (FAISS, Chroma) for efficient similarity search
+- Semantic distance measures conceptual similarity, not character edits
+- **Same infrastructure serves both use cases** (issues AND labels)
+
+**Technical Approach - Issue Deduplication**:
+```python
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+
+# Generate embeddings for issue description
+embeddings = OpenAIEmbeddings(
+    model="text-embedding-3-small",
+    base_url="https://models.inference.ai.azure.com",
+    api_key=os.environ["GITHUB_TOKEN"],
+)
+
+# Build vector store from existing open issues
+issue_texts = [f"{issue.title}\n{issue.body}" for issue in open_issues]
+vector_store = FAISS.from_texts(issue_texts, embeddings, metadatas=[{"number": i.number} for i in open_issues])
+
+# Search for similar issues
+similar = vector_store.similarity_search_with_score(new_issue_text, k=5)
+duplicates = [(doc.metadata["number"], score) for doc, score in similar if score > THRESHOLD]
+```
+
+**Technical Approach - Label Matching**:
+```python
+# Build vector store from existing repo labels
+label_names = [label.name for label in repo_labels]
+label_store = FAISS.from_texts(label_names, embeddings, metadatas=[{"name": l.name} for l in repo_labels])
+
+# Match user-specified label to existing labels
+def find_semantic_label_match(user_label: str, threshold: float = 0.8) -> str | None:
+    """Find semantically similar existing label."""
+    results = label_store.similarity_search_with_score(user_label, k=1)
+    if results and results[0][1] >= threshold:
+        return results[0][0].metadata["name"]
+    return None
+
+# Examples:
+# find_semantic_label_match("defect") → "bug"
+# find_semantic_label_match("improvement") → "enhancement"
+# find_semantic_label_match("testing") → "tests"
+```
+
+**Advantages over Levenshtein**:
+| Aspect | Levenshtein | Semantic Embeddings |
+|--------|-------------|---------------------|
+| "Same typo" detection | ✅ Excellent | ✅ Good |
+| "Same idea, different words" | ❌ Poor | ✅ Excellent |
+| Performance at scale | ⚠️ O(n*m) per comparison | ✅ O(log n) with vector index |
+| False positives | High (similar chars ≠ similar meaning) | Low |
+| False negatives | High (different chars = missed duplicates) | Low |
+
+**Integration Points**:
+1. **Issue deduplication**: Run during `agents-63-issue-intake.yml` before bridge creation
+   - Post advisory comment with similar issues (doesn't block creation)
+   - Link related issues for context
+2. **Label matching**: Replace Levenshtein in `findMatchingLabel()` function
+   - Same embeddings model, different vector store
+   - Cache label embeddings (labels change rarely)
 
 **Plausibility**: ⭐⭐⭐⭐ HIGH (embeddings are well-understood)
-**Scope**: ~2 days
+**Scope**: ~2-3 days (expanded to include label matching)
 
 ### 7. Automatic Task Decomposition