From 812ed22e4a9a6e16901a39f78d058fd5d5682562 Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Sat, 25 Apr 2026 10:48:51 -0400 Subject: [PATCH 1/3] =?UTF-8?q?ci(backlog):=20index-integrity=20workflow?= =?UTF-8?q?=20=E2=80=94=20Phase=201c=20per=20BACKLOG=20ADR?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BACKLOG-per-row-file ADR (PR #474, just merged) committed to a Phase 1c lint that enforces docs/BACKLOG.md ↔ docs/backlog/ per-row parity. The ADR named tools/backlog/lint-index.sh as the proposed shape, but there is no pre-commit-hook framework in this repo — the CI surface is the equivalent enforcement point, so this lands as a workflow rather than a separate wrapper script. Mirrors the structure of memory-index-integrity.yml: SHA-pinned checkout, explicit minimum permissions, concurrency group, no user-authored input touched. **Pre-Phase-2 mode** (current state): docs/BACKLOG.md is still the monolithic authoritative file (~12,800 lines) and is hand-edited. The workflow detects this via the absence of the "AUTO-GENERATED" header line emitted by generate-index.sh, and in that mode only verifies the per-row files themselves are well-formed (parseable by the generator). This guards the existing per-row files (B-0001 example, B-0002 Otto-287 Noether) without false-positive-flagging the legacy monolithic file. **Phase 2+ mode** (after bulk migration): the workflow runs generate-index.sh --check, which exits non-zero on any drift between per-row files and the generated index. Becomes load-bearing at that point. Verified locally: - head -5 docs/BACKLOG.md does NOT match AUTO-GENERATED → pre-Phase-2 path taken - generate-index.sh --stdout produces parseable output --- .github/workflows/backlog-index-integrity.yml | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 .github/workflows/backlog-index-integrity.yml diff --git a/.github/workflows/backlog-index-integrity.yml b/.github/workflows/backlog-index-integrity.yml new file mode 100644 index 000000000..a77a24a64 --- /dev/null +++ b/.github/workflows/backlog-index-integrity.yml @@ -0,0 +1,88 @@ +name: backlog-index-integrity + +# Enforces that `docs/BACKLOG.md` (the generated index) stays in +# sync with the per-row files under `docs/backlog/P[0-3]/B--*.md`. +# When a row file is added/modified/removed, the index must be +# regenerated via `tools/backlog/generate-index.sh` in the same +# commit (or PR) so a fresh reader sees the same row set in both +# places. +# +# Phase 1c per the BACKLOG-per-row-file ADR +# (docs/DECISIONS/2026-04-22-backlog-per-row-file-restructure.md +# §"Existing substrate Phase 1a prior work" → Phase 1c OWED). +# This workflow IS the lint-index gate; it wraps +# `generate-index.sh --check` rather than introducing a separate +# `tools/backlog/lint-index.sh` wrapper, since there is no +# pre-commit-hook framework currently wired up in this repo — +# the CI surface is the equivalent enforcement point. +# +# Note: until Phase 2 bulk-migration runs, `docs/BACKLOG.md` is +# still the monolithic authoritative file. Phase 2 will overwrite +# it with the generated index. This workflow becomes load-bearing +# at that point. Until then it primarily guards the per-row +# files themselves are well-formed (B-0001 example, B-0002 +# Otto-287 Noether) and skips the equivalence check. +# +# Safe-pattern compliance (mirrors memory-index-integrity.yml): +# - SHA-pinned action versions (actions/checkout@de0fac2...) +# - Explicit `permissions:` minimum +# - Only first-party trusted context (github.sha) — no +# user-authored text is referenced. +# - Concurrency group + cancel-in-progress: false. +# - runs-on: ubuntu-24.04 pinned. + +on: + pull_request: + paths: + - "docs/backlog/**" + - "docs/BACKLOG.md" + - "tools/backlog/**" + push: + branches: [main] + paths: + - "docs/backlog/**" + - "docs/BACKLOG.md" + - "tools/backlog/**" + workflow_dispatch: {} + +permissions: + contents: read + +concurrency: + group: backlog-index-integrity-${{ github.ref }} + cancel-in-progress: false + +jobs: + check: + name: check docs/BACKLOG.md generated-index drift + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + + - name: verify per-row + index parity + shell: bash + run: | + set -euo pipefail + + # Pre-Phase-2 sentinel: if docs/BACKLOG.md still looks + # monolithic (lacks the "AUTO-GENERATED" header line + # emitted by generate-index.sh), the drift check would + # fire false-positives on every per-row edit since the + # legacy file isn't yet the generated output. In that + # case, only verify the per-row files themselves are + # well-formed (parseable frontmatter), and skip the + # index-equivalence check. + if ! head -5 docs/BACKLOG.md | grep -q "AUTO-GENERATED by tools/backlog/generate-index.sh"; then + echo "Phase pre-2 mode: docs/BACKLOG.md is the monolithic" >&2 + echo "authoritative file; skipping generated-index drift" >&2 + echo "check. Verifying per-row file parseability instead." >&2 + ./tools/backlog/generate-index.sh --stdout > /dev/null + echo "per-row files parseable" >&2 + exit 0 + fi + + # Phase 2+ mode: docs/BACKLOG.md is the generated index; + # any drift between it and the per-row tree is a violation. + ./tools/backlog/generate-index.sh --check From 339d5e47ed8d8ef8b1a458a9c105dfad647e8577 Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Sat, 25 Apr 2026 10:59:03 -0400 Subject: [PATCH 2/3] ci(backlog): fail-fast on missing files + per-row field validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three PR #492 review threads addressed: 1. **chatgpt-codex P1 + copilot P1** — sentinel hides missing docs/BACKLOG.md as pre-Phase-2 mode and exits 0, which would let an accidental delete of the authoritative backlog ship green. Added explicit existence preconditions that fail fast on missing docs/BACKLOG.md OR missing/ non-executable tools/backlog/generate-index.sh. 2. **copilot P2** — pre-Phase-2 parseability proxy (`generate-index.sh --stdout > /dev/null`) is too weak. The generator is forgiving: a row with bad frontmatter produces an empty index line, not an error. Replaced with explicit awk-extraction of id/status/title for every B-*.md file and an empty-value check; any row missing all three required fields fails the workflow with a clear per-file message. Belt-and-suspenders: still runs generator end-to-end for structural issues the field-only check might miss. Verified locally with bash: - 2 per-row files enumerated (B-0001 + B-0002) - All 3 required fields extracted cleanly from each - bad_count=0 - generator runs cleanly Net: workflow is now defensive-by-default. Missing files surface as errors immediately; malformed per-row files surface with file-by-file diagnostics. --- .github/workflows/backlog-index-integrity.yml | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/.github/workflows/backlog-index-integrity.yml b/.github/workflows/backlog-index-integrity.yml index a77a24a64..f7daf9189 100644 --- a/.github/workflows/backlog-index-integrity.yml +++ b/.github/workflows/backlog-index-integrity.yml @@ -66,6 +66,24 @@ jobs: run: | set -euo pipefail + # Existence preconditions — fail fast (chatgpt-codex P1 + + # copilot P1 review on PR #492). A missing docs/BACKLOG.md + # or missing generator must NOT silently fall into + # pre-Phase-2 mode and exit 0; that would let an accidental + # delete of the authoritative backlog ship green. + if [ ! -f docs/BACKLOG.md ]; then + echo "ERROR: docs/BACKLOG.md is missing." >&2 + echo "This file is the authoritative backlog (pre-Phase-2)" >&2 + echo "or the generated index (Phase 2+). Either way it" >&2 + echo "must exist. Restore it or revert the deletion." >&2 + exit 1 + fi + if [ ! -x tools/backlog/generate-index.sh ]; then + echo "ERROR: tools/backlog/generate-index.sh missing or" >&2 + echo "non-executable. Phase 1a tooling is required." >&2 + exit 1 + fi + # Pre-Phase-2 sentinel: if docs/BACKLOG.md still looks # monolithic (lacks the "AUTO-GENERATED" header line # emitted by generate-index.sh), the drift check would @@ -77,9 +95,41 @@ jobs: if ! head -5 docs/BACKLOG.md | grep -q "AUTO-GENERATED by tools/backlog/generate-index.sh"; then echo "Phase pre-2 mode: docs/BACKLOG.md is the monolithic" >&2 echo "authoritative file; skipping generated-index drift" >&2 - echo "check. Verifying per-row file parseability instead." >&2 + echo "check. Verifying per-row files instead." >&2 + + # Per-row file parseability check (copilot P2 review on + # PR #492). The generator is forgiving — a row file with + # bad frontmatter would silently produce empty index lines, + # not error out. So we explicitly require id+status+title + # extraction to succeed for every row file, with non-empty + # values, before declaring "parseable". + row_count=0 + bad_count=0 + while IFS= read -r -d '' row; do + row_count=$((row_count + 1)) + id=$(awk '/^id:[[:space:]]+/{sub(/^id:[[:space:]]+/,""); print; exit}' "$row") + status=$(awk '/^status:[[:space:]]+/{sub(/^status:[[:space:]]+/,""); print; exit}' "$row") + title=$(awk '/^title:[[:space:]]+/{sub(/^title:[[:space:]]+/,""); print; exit}' "$row") + if [ -z "$id" ] || [ -z "$status" ] || [ -z "$title" ]; then + echo " bad: $row (missing id/status/title)" >&2 + bad_count=$((bad_count + 1)) + fi + done < <(find docs/backlog -type f -name 'B-*.md' -print0) + + echo " per-row files: $row_count total, $bad_count malformed" >&2 + + if [ "$bad_count" -gt 0 ]; then + echo "ERROR: $bad_count per-row file(s) have malformed" >&2 + echo "frontmatter (missing id/status/title). Each row" >&2 + echo "must have all three fields per Otto-181 schema." >&2 + exit 1 + fi + + # Belt-and-suspenders: also exercise the generator end-to-end + # to catch any structural issues the field-only check missed. ./tools/backlog/generate-index.sh --stdout > /dev/null - echo "per-row files parseable" >&2 + + echo "per-row files parseable + generator runs cleanly" >&2 exit 0 fi From d3fb04cbc47e8ea48f2104b3f0daded046eaeec6 Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Sat, 25 Apr 2026 11:09:49 -0400 Subject: [PATCH 3/3] ci(backlog): scope per-row field extraction to YAML frontmatter only Resolves chatgpt-codex P2 review on PR #492. The earlier extraction (`awk '/^id:/'`, etc.) matched anywhere in the file, so a malformed frontmatter could falsely pass if `id:` / `status:` / `title:` happened to appear later in the body (e.g., in a code block, an example, or a discussion of the schema itself). Now uses an explicit `extract_frontmatter_field` shell function that mirrors `tools/backlog/generate-index.sh`'s `extract_field` state machine: state=0 before the opening `---`, state=1 inside the frontmatter, exit on the closing `---`. Field match only fires when state==1, so body-text matches cannot mask missing frontmatter. Verified locally with bash: - Both real per-row files (B-0001, B-0002) parse cleanly. - A simulated row with `status: open` only in the body (NOT in frontmatter) correctly returns empty for status, triggering the bad_count failure path. Same gate intent (id+status+title required), now with the hole closed. --- .github/workflows/backlog-index-integrity.yml | 34 ++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/.github/workflows/backlog-index-integrity.yml b/.github/workflows/backlog-index-integrity.yml index f7daf9189..78f24530c 100644 --- a/.github/workflows/backlog-index-integrity.yml +++ b/.github/workflows/backlog-index-integrity.yml @@ -103,15 +103,41 @@ jobs: # not error out. So we explicitly require id+status+title # extraction to succeed for every row file, with non-empty # values, before declaring "parseable". + # + # Frontmatter-scoped extraction (chatgpt-codex P2 follow-up + # review): the awk match is restricted to lines BETWEEN the + # opening and closing `---` markers, mirroring + # tools/backlog/generate-index.sh's extract_field state + # machine. Without this scope, a row with malformed + # frontmatter could falsely pass if the body happened to + # contain `id:` / `status:` / `title:` text (e.g., in a + # code block or example). The frontmatter-scoped match + # closes that hole. + extract_frontmatter_field() { + local file="$1" field="$2" + awk -v field="$field" ' + BEGIN { state = 0 } + /^---$/ { + if (state == 0) { state = 1; next } + if (state == 1) { exit } + } + state == 1 && $0 ~ "^"field":[[:space:]]+" { + sub("^"field":[[:space:]]+", "") + print + exit + } + ' "$file" + } + row_count=0 bad_count=0 while IFS= read -r -d '' row; do row_count=$((row_count + 1)) - id=$(awk '/^id:[[:space:]]+/{sub(/^id:[[:space:]]+/,""); print; exit}' "$row") - status=$(awk '/^status:[[:space:]]+/{sub(/^status:[[:space:]]+/,""); print; exit}' "$row") - title=$(awk '/^title:[[:space:]]+/{sub(/^title:[[:space:]]+/,""); print; exit}' "$row") + id=$(extract_frontmatter_field "$row" "id") + status=$(extract_frontmatter_field "$row" "status") + title=$(extract_frontmatter_field "$row" "title") if [ -z "$id" ] || [ -z "$status" ] || [ -z "$title" ]; then - echo " bad: $row (missing id/status/title)" >&2 + echo " bad: $row (missing id/status/title in frontmatter)" >&2 bad_count=$((bad_count + 1)) fi done < <(find docs/backlog -type f -name 'B-*.md' -print0)