diff --git a/.github/workflows/backlog-index-integrity.yml b/.github/workflows/backlog-index-integrity.yml new file mode 100644 index 00000000..78f24530 --- /dev/null +++ b/.github/workflows/backlog-index-integrity.yml @@ -0,0 +1,164 @@ +name: backlog-index-integrity + +# Enforces that `docs/BACKLOG.md` (the generated index) stays in +# sync with the per-row files under `docs/backlog/P[0-3]/B--*.md`. +# When a row file is added/modified/removed, the index must be +# regenerated via `tools/backlog/generate-index.sh` in the same +# commit (or PR) so a fresh reader sees the same row set in both +# places. +# +# Phase 1c per the BACKLOG-per-row-file ADR +# (docs/DECISIONS/2026-04-22-backlog-per-row-file-restructure.md +# §"Existing substrate Phase 1a prior work" → Phase 1c OWED). +# This workflow IS the lint-index gate; it wraps +# `generate-index.sh --check` rather than introducing a separate +# `tools/backlog/lint-index.sh` wrapper, since there is no +# pre-commit-hook framework currently wired up in this repo — +# the CI surface is the equivalent enforcement point. +# +# Note: until Phase 2 bulk-migration runs, `docs/BACKLOG.md` is +# still the monolithic authoritative file. Phase 2 will overwrite +# it with the generated index. This workflow becomes load-bearing +# at that point. Until then it primarily guards the per-row +# files themselves are well-formed (B-0001 example, B-0002 +# Otto-287 Noether) and skips the equivalence check. +# +# Safe-pattern compliance (mirrors memory-index-integrity.yml): +# - SHA-pinned action versions (actions/checkout@de0fac2...) +# - Explicit `permissions:` minimum +# - Only first-party trusted context (github.sha) — no +# user-authored text is referenced. +# - Concurrency group + cancel-in-progress: false. +# - runs-on: ubuntu-24.04 pinned. + +on: + pull_request: + paths: + - "docs/backlog/**" + - "docs/BACKLOG.md" + - "tools/backlog/**" + push: + branches: [main] + paths: + - "docs/backlog/**" + - "docs/BACKLOG.md" + - "tools/backlog/**" + workflow_dispatch: {} + +permissions: + contents: read + +concurrency: + group: backlog-index-integrity-${{ github.ref }} + cancel-in-progress: false + +jobs: + check: + name: check docs/BACKLOG.md generated-index drift + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + + - name: verify per-row + index parity + shell: bash + run: | + set -euo pipefail + + # Existence preconditions — fail fast (chatgpt-codex P1 + + # copilot P1 review on PR #492). A missing docs/BACKLOG.md + # or missing generator must NOT silently fall into + # pre-Phase-2 mode and exit 0; that would let an accidental + # delete of the authoritative backlog ship green. + if [ ! -f docs/BACKLOG.md ]; then + echo "ERROR: docs/BACKLOG.md is missing." >&2 + echo "This file is the authoritative backlog (pre-Phase-2)" >&2 + echo "or the generated index (Phase 2+). Either way it" >&2 + echo "must exist. Restore it or revert the deletion." >&2 + exit 1 + fi + if [ ! -x tools/backlog/generate-index.sh ]; then + echo "ERROR: tools/backlog/generate-index.sh missing or" >&2 + echo "non-executable. Phase 1a tooling is required." >&2 + exit 1 + fi + + # Pre-Phase-2 sentinel: if docs/BACKLOG.md still looks + # monolithic (lacks the "AUTO-GENERATED" header line + # emitted by generate-index.sh), the drift check would + # fire false-positives on every per-row edit since the + # legacy file isn't yet the generated output. In that + # case, only verify the per-row files themselves are + # well-formed (parseable frontmatter), and skip the + # index-equivalence check. + if ! head -5 docs/BACKLOG.md | grep -q "AUTO-GENERATED by tools/backlog/generate-index.sh"; then + echo "Phase pre-2 mode: docs/BACKLOG.md is the monolithic" >&2 + echo "authoritative file; skipping generated-index drift" >&2 + echo "check. Verifying per-row files instead." >&2 + + # Per-row file parseability check (copilot P2 review on + # PR #492). The generator is forgiving — a row file with + # bad frontmatter would silently produce empty index lines, + # not error out. So we explicitly require id+status+title + # extraction to succeed for every row file, with non-empty + # values, before declaring "parseable". + # + # Frontmatter-scoped extraction (chatgpt-codex P2 follow-up + # review): the awk match is restricted to lines BETWEEN the + # opening and closing `---` markers, mirroring + # tools/backlog/generate-index.sh's extract_field state + # machine. Without this scope, a row with malformed + # frontmatter could falsely pass if the body happened to + # contain `id:` / `status:` / `title:` text (e.g., in a + # code block or example). The frontmatter-scoped match + # closes that hole. + extract_frontmatter_field() { + local file="$1" field="$2" + awk -v field="$field" ' + BEGIN { state = 0 } + /^---$/ { + if (state == 0) { state = 1; next } + if (state == 1) { exit } + } + state == 1 && $0 ~ "^"field":[[:space:]]+" { + sub("^"field":[[:space:]]+", "") + print + exit + } + ' "$file" + } + + row_count=0 + bad_count=0 + while IFS= read -r -d '' row; do + row_count=$((row_count + 1)) + id=$(extract_frontmatter_field "$row" "id") + status=$(extract_frontmatter_field "$row" "status") + title=$(extract_frontmatter_field "$row" "title") + if [ -z "$id" ] || [ -z "$status" ] || [ -z "$title" ]; then + echo " bad: $row (missing id/status/title in frontmatter)" >&2 + bad_count=$((bad_count + 1)) + fi + done < <(find docs/backlog -type f -name 'B-*.md' -print0) + + echo " per-row files: $row_count total, $bad_count malformed" >&2 + + if [ "$bad_count" -gt 0 ]; then + echo "ERROR: $bad_count per-row file(s) have malformed" >&2 + echo "frontmatter (missing id/status/title). Each row" >&2 + echo "must have all three fields per Otto-181 schema." >&2 + exit 1 + fi + + # Belt-and-suspenders: also exercise the generator end-to-end + # to catch any structural issues the field-only check missed. + ./tools/backlog/generate-index.sh --stdout > /dev/null + + echo "per-row files parseable + generator runs cleanly" >&2 + exit 0 + fi + + # Phase 2+ mode: docs/BACKLOG.md is the generated index; + # any drift between it and the per-row tree is a violation. + ./tools/backlog/generate-index.sh --check