From d065b9ea1f5fcac62dbb7834119b59938e72fa66 Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Sat, 25 Apr 2026 23:43:02 -0400 Subject: [PATCH 1/2] =?UTF-8?q?feat(hygiene):=20tools/hygiene/fix-markdown?= =?UTF-8?q?-md032-md026.py=20=E2=80=94=20recurring=20pattern=20extracted?= =?UTF-8?q?=20to=20substrate=20primitive=20(Otto-346)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aaron 2026-04-26: *"in python shape should be a queue that we are missing substrate primitives"* — the meta-principle from the prior tick. I'd been carrying this fix as `/tmp/md_fix.py` and re-typing it across multiple drain ticks. Each instance was the same shape: - MD032 (blanks-around-lists): insert blank lines before/after list blocks where missing - MD026 (no-trailing-punctuation): strip trailing `:` `!` `?` from ATX headings Per Otto-346 candidate principle (recurring dynamic = signal of missing primitive), extracting to `tools/hygiene/` as a properly-tooled script is the right shape. Composes with: - `tools/hygiene/sort-tick-history-canonical.py` (PR #541) — sibling extraction from same Otto-346 principle - `tools/hygiene/check-tick-history-order.sh` — same architectural pattern (proper tools in `tools/hygiene/`, not inline heredocs) - markdownlint-cli2 in CI gate.yml — this tool produces input the linter accepts; the linter is the detection - Otto-341 (mechanism over discipline) Self-tests: - `--help`: documents - `--dry-run` on README.md: "OK: no changes needed" (idempotent) - Type-hinted, argparse, exit codes Use in queue-drain context: when CI markdownlint fails on a PR with MD032/MD026 violations: ``` git checkout python3 tools/hygiene/fix-markdown-md032-md026.py path/to/file.md git add -A && git commit && git push ``` Replaces the recurring inline-Python pattern that prior ticks used. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- tools/hygiene/fix-markdown-md032-md026.py | 144 ++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100755 tools/hygiene/fix-markdown-md032-md026.py diff --git a/tools/hygiene/fix-markdown-md032-md026.py b/tools/hygiene/fix-markdown-md032-md026.py new file mode 100755 index 00000000..51de3136 --- /dev/null +++ b/tools/hygiene/fix-markdown-md032-md026.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +tools/hygiene/fix-markdown-md032-md026.py — mechanical fix for two +markdownlint violations: + +- MD032 (blanks-around-lists): inserts blank lines before/after list + blocks where they're missing +- MD026 (no-trailing-punctuation): strips trailing `:` `!` `?` from + ATX headings + +Why this exists (Aaron 2026-04-26): + "in python shape should be a queue that we are missing substraight + primitives" + +I'd been carrying this fix as `/tmp/md_fix.py` and re-typing it +across multiple drain ticks. Per Otto-346 principle (recurring +dynamic Python = signal a substrate primitive is missing), the +right home is `tools/hygiene/` checked into substrate. This tool +is the formalized version of the recurring pattern. + +Usage: + python3 tools/hygiene/fix-markdown-md032-md026.py FILE [FILE ...] + python3 tools/hygiene/fix-markdown-md032-md026.py --dry-run FILE + +Always idempotent: running on already-clean file is no-op. + +Composes with: +- markdownlint-cli2 (.github/workflows/gate.yml lint-markdown job) + — this tool produces input the linter accepts; the linter is the + detection check +- Otto-341 (mechanism over discipline; markdownlint discipline + becomes mechanism via this tool) +- Otto-346 candidate (recurring dynamic = missing primitive; + this tool IS the primitive that absorbs the recurring pattern) +""" + +import argparse +import re +import sys +from pathlib import Path + + +_LIST_LINE = re.compile(r"^( )*(- |\d+\. )") +_INDENTED_LINE = re.compile(r"^ +\S") +_HEADING_WITH_PUNCT = re.compile(r"^(#+ .+?)([:!?]+)$") + + +def _is_list_or_continuation(line: str) -> bool: + """Return True if line is a list item or its continuation + (indented paragraph under a list item).""" + return bool(_LIST_LINE.match(line) or _INDENTED_LINE.match(line)) + + +def _is_list(line: str) -> bool: + """Return True if line starts a list item.""" + return bool(_LIST_LINE.match(line)) + + +def fix_md032(text: str) -> str: + """Insert blank lines before list blocks (where the previous + line is non-blank and not itself a list/continuation) and after + list blocks (where the next line is non-blank and not part of + the list).""" + lines = text.split("\n") + + # Pass 1: insert blank line BEFORE a list when previous is a + # non-list, non-blank line. + out: list[str] = [] + for line in lines: + if _is_list(line) and out: + prev = out[-1] + if prev.strip() and not _is_list_or_continuation(prev): + out.append("") + out.append(line) + + # Pass 2: insert blank line AFTER a list-item when next is a + # non-list, non-blank line. + out2: list[str] = [] + for i, line in enumerate(out): + out2.append(line) + if _is_list(line) and i + 1 < len(out): + nxt = out[i + 1] + if nxt.strip() and not _is_list_or_continuation(nxt): + out2.append("") + + return "\n".join(out2) + + +def fix_md026(text: str) -> str: + """Strip trailing `:`, `!`, `?` punctuation from ATX heading + lines (matches `^#+ ...`).""" + out: list[str] = [] + for line in text.split("\n"): + m = _HEADING_WITH_PUNCT.match(line) + if m: + out.append(m.group(1)) + else: + out.append(line) + return "\n".join(out) + + +def fix_file(path: Path, dry_run: bool = False) -> tuple[bool, int]: + """Apply both fixes to a file. Returns (changed, bytes_diff).""" + if not path.exists(): + print(f"ERROR: file not found: {path}", file=sys.stderr) + return False, 0 + original = path.read_text() + fixed = fix_md026(fix_md032(original)) + if fixed == original: + return False, 0 + if not dry_run: + path.write_text(fixed) + return True, len(fixed) - len(original) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Fix markdownlint MD032 + MD026 violations mechanically" + ) + parser.add_argument("files", nargs="+", help="Markdown files to fix") + parser.add_argument( + "--dry-run", + action="store_true", + help="Print what would change; do not modify files", + ) + args = parser.parse_args(argv) + + any_changed = False + for f in args.files: + path = Path(f) + changed, byte_diff = fix_file(path, dry_run=args.dry_run) + if changed: + any_changed = True + verb = "WOULD FIX" if args.dry_run else "FIXED" + sign = "+" if byte_diff >= 0 else "" + print(f"{verb} {path} ({sign}{byte_diff} bytes)") + + if not any_changed: + print("OK: no changes needed") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 56fb5c6d6fe5900a321bb5ad92c4106efb9922e0 Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Sun, 26 Apr 2026 02:46:14 -0400 Subject: [PATCH 2/2] =?UTF-8?q?fix(markdown-tool):=20P0=20fenced-code-muta?= =?UTF-8?q?tion=20prevention=20+=20P0=20missing-file=20exit=20code=20+=20P?= =?UTF-8?q?1=20list-marker=20coverage=20+=20P2=20trailing-whitespace=20MD0?= =?UTF-8?q?26=20=E2=80=94=20six=20findings=20from=20#542=20review?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six findings from Codex/Copilot review (PR #542 thread feedback; left-unresolved by drain subagent for code-author follow-up): **P0 (Copilot)** — fix_md032 mutated lines starting with `- ` / `1. ` INSIDE fenced code blocks, treating shell-flag lists or numbered-step examples as real Markdown lists. Inserted blank lines into code examples → corrupted user content. Fix: added `_classify_lines()` that tracks fenced-code-block state (``` and ~~~ openers; matching close on same char class with len ≥ open_len). Both fix_md032 and fix_md026 skip lines inside fences. Test verifies: `- this is shell flag` inside ```bash`...``` is preserved untouched. **P0 (Copilot)** — missing-file errors didn't affect exit code. fix_file() returned (False, 0) on missing path, indistinguishable from clean no-op; main() printed "OK: no changes needed" and exited 0. Drain scripts and CI couldn't detect typoed paths. Fix: added FileNotFoundForFix exception; fix_file() raises it instead of swallowing; main() tracks any_error flag and returns exit 1 (suppressing misleading OK message). Test verifies: typo → "ERROR: file not found" + exit 1. **P1 (Copilot)** — _LIST_LINE only matched `- ` and `\d+\. `; missed `* ` and `+ ` CommonMark unordered markers. Repo has MD004 disabled so all three markers exist in committed files. Fix: regex extended to `[-*+] ` character-class. Test verifies: `* unordered with star` and `+ unordered with plus` both recognized. **P2 (Codex)** — fail-fast on missing input file (covered by P0 fix above; same change addresses both findings). **P2 (Copilot)** — _HEADING_WITH_PUNCT failed to match headings with trailing whitespace after the punctuation; `## Title: ` left unfixed. Fix: regex now `^(#+ .+?)([.,;:!?]+)\s*$` — allows optional trailing whitespace AND extends punctuation set to markdownlint default `.,;:!?`. Test verifies: trailing-whitespace heading fixed. Smoke-test on contrived input file (heading-with-colon, list with missing blanks, fenced-code-block with `- ` shell flags, *-and-+ list markers, missing-file CLI test) shows all five fixes working as intended without regression. Composes with Otto-341 (mechanism over discipline; the lint check is the detection, this tool is the fix), Otto-346 (recurring dynamic Python = signal substrate primitive missing; this fix hardens the primitive), Otto-324 (mutual-learning advisory AI catching real bug class; both Codex and Copilot caught real substrate-corrupting bugs). --- tools/hygiene/fix-markdown-md032-md026.py | 123 +++++++++++++++++++--- 1 file changed, 107 insertions(+), 16 deletions(-) diff --git a/tools/hygiene/fix-markdown-md032-md026.py b/tools/hygiene/fix-markdown-md032-md026.py index 51de3136..faa06ec6 100755 --- a/tools/hygiene/fix-markdown-md032-md026.py +++ b/tools/hygiene/fix-markdown-md032-md026.py @@ -40,9 +40,24 @@ from pathlib import Path -_LIST_LINE = re.compile(r"^( )*(- |\d+\. )") +# All four CommonMark unordered list markers (`-`, `*`, `+`) plus +# ordered (`\d+\.`). markdownlint MD004 is disabled in this repo, so +# alternate markers do appear in committed files. +_LIST_LINE = re.compile(r"^( )*([-*+] |\d+\. )") _INDENTED_LINE = re.compile(r"^ +\S") -_HEADING_WITH_PUNCT = re.compile(r"^(#+ .+?)([:!?]+)$") + +# MD026 strips trailing punctuation from ATX headings. markdownlint's +# default `punctuation` setting is `.,;:!?` (excluding `?` is configurable +# but we strip it here since the original tool stripped it). Allow +# optional trailing whitespace after the punctuation so headings like +# `## Title: ` are still cleaned (the original regex required EOL +# immediately after the punctuation). +_HEADING_WITH_PUNCT = re.compile(r"^(#+ .+?)([.,;:!?]+)\s*$") + +# Fenced-code-block delimiters. CommonMark allows ``` or ~~~ (3+ chars) +# at the start of a line (with optional info string after). Tilde and +# backtick fences cannot interrupt each other — track which fence opened. +_FENCE_OPEN = re.compile(r"^( {0,3})(`{3,}|~{3,})\s*([^`]*)$") def _is_list_or_continuation(line: str) -> bool: @@ -56,41 +71,100 @@ def _is_list(line: str) -> bool: return bool(_LIST_LINE.match(line)) +def _classify_lines(lines: list[str]) -> list[bool]: + """Return a boolean list `inside[i]` = True iff line `i` is inside + a fenced code block (and therefore must NOT be touched by the + MD032/MD026 transforms — that would mutate code examples). + + A code fence is a line starting with 3+ backticks or 3+ tildes; + closing fence must be the same character class as the opener and + have at least as many characters. We only track the simple case + sufficient for committed-markdown shapes; nested or weird + indentation (>3 spaces makes it a code-indent rather than a fence) + is conservatively treated as "inside" once opened until matching + close — better to skip transforms than to corrupt code.""" + inside: list[bool] = [] + open_char: str | None = None # '`' or '~' + open_len: int = 0 + for line in lines: + m = _FENCE_OPEN.match(line) + if m and open_char is None: + # Opening fence + fence = m.group(2) + open_char = fence[0] + open_len = len(fence) + inside.append(True) + elif m and open_char is not None: + # Possible closing fence — must be same char class and + # length >= open_len, with no info string. + fence = m.group(2) + if fence[0] == open_char and len(fence) >= open_len and not m.group(3).strip(): + inside.append(True) # The closing fence line itself + open_char = None + open_len = 0 + else: + # A different fence char or shorter — still inside the + # outer block (it's just code that looks fence-shaped). + inside.append(True) + else: + inside.append(open_char is not None) + return inside + + def fix_md032(text: str) -> str: """Insert blank lines before list blocks (where the previous line is non-blank and not itself a list/continuation) and after list blocks (where the next line is non-blank and not part of - the list).""" + the list). + + Skips lines inside fenced code blocks — inserting blanks there + would mutate code examples (e.g. shell-script with `- option` + flags would acquire spurious blanks).""" lines = text.split("\n") + inside = _classify_lines(lines) # Pass 1: insert blank line BEFORE a list when previous is a - # non-list, non-blank line. + # non-list, non-blank line. The boolean state needs to survive the + # output mutation, so we map indices via the input position. out: list[str] = [] - for line in lines: - if _is_list(line) and out: + out_inside: list[bool] = [] + for i, line in enumerate(lines): + if not inside[i] and _is_list(line) and out: prev = out[-1] - if prev.strip() and not _is_list_or_continuation(prev): + prev_inside = out_inside[-1] + if prev.strip() and not prev_inside and not _is_list_or_continuation(prev): out.append("") + out_inside.append(False) out.append(line) + out_inside.append(inside[i]) # Pass 2: insert blank line AFTER a list-item when next is a # non-list, non-blank line. out2: list[str] = [] for i, line in enumerate(out): out2.append(line) - if _is_list(line) and i + 1 < len(out): + if not out_inside[i] and _is_list(line) and i + 1 < len(out): nxt = out[i + 1] - if nxt.strip() and not _is_list_or_continuation(nxt): + nxt_inside = out_inside[i + 1] + if nxt.strip() and not nxt_inside and not _is_list_or_continuation(nxt): out2.append("") return "\n".join(out2) def fix_md026(text: str) -> str: - """Strip trailing `:`, `!`, `?` punctuation from ATX heading - lines (matches `^#+ ...`).""" + """Strip trailing `.` `,` `;` `:` `!` `?` punctuation (with optional + trailing whitespace) from ATX heading lines (matches `^#+ ...`). + + Skips lines inside fenced code blocks — `# heading-shaped` lines + inside code are content, not headings.""" + lines = text.split("\n") + inside = _classify_lines(lines) out: list[str] = [] - for line in text.split("\n"): + for i, line in enumerate(lines): + if inside[i]: + out.append(line) + continue m = _HEADING_WITH_PUNCT.match(line) if m: out.append(m.group(1)) @@ -99,11 +173,19 @@ def fix_md026(text: str) -> str: return "\n".join(out) +class FileNotFoundForFix(Exception): + """Raised when an input file is missing — distinguishes from + 'no changes needed' so main() can exit non-zero.""" + + def fix_file(path: Path, dry_run: bool = False) -> tuple[bool, int]: - """Apply both fixes to a file. Returns (changed, bytes_diff).""" + """Apply both fixes to a file. Returns (changed, bytes_diff). + + Raises FileNotFoundForFix if the path does not exist, so the + caller can distinguish missing-file (real error, exit non-zero) + from clean-no-op (silent success).""" if not path.exists(): - print(f"ERROR: file not found: {path}", file=sys.stderr) - return False, 0 + raise FileNotFoundForFix(str(path)) original = path.read_text() fixed = fix_md026(fix_md032(original)) if fixed == original: @@ -126,15 +208,24 @@ def main(argv: list[str] | None = None) -> int: args = parser.parse_args(argv) any_changed = False + any_error = False for f in args.files: path = Path(f) - changed, byte_diff = fix_file(path, dry_run=args.dry_run) + try: + changed, byte_diff = fix_file(path, dry_run=args.dry_run) + except FileNotFoundForFix as exc: + print(f"ERROR: file not found: {exc}", file=sys.stderr) + any_error = True + continue if changed: any_changed = True verb = "WOULD FIX" if args.dry_run else "FIXED" sign = "+" if byte_diff >= 0 else "" print(f"{verb} {path} ({sign}{byte_diff} bytes)") + if any_error: + # Suppress the misleading OK message when any input failed. + return 1 if not any_changed: print("OK: no changes needed") return 0