-
Notifications
You must be signed in to change notification settings - Fork 1
feat(hygiene): tools/hygiene/sort-tick-history-canonical.py — substrate-built-in instead of dynamic Python #541
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,10 +45,29 @@ | |
|
|
||
| import argparse | ||
| import re | ||
| import subprocess | ||
| import sys | ||
| from pathlib import Path | ||
|
|
||
|
|
||
| def repo_root() -> Path | None: | ||
| """Resolve the repo root via `git rev-parse --show-toplevel`. | ||
|
|
||
| Mirrors sibling hygiene scripts so that running this tool from a | ||
| subdirectory still resolves the default `--file` correctly. Returns | ||
| None if not in a git repo (caller falls back to CWD).""" | ||
| try: | ||
| out = subprocess.run( | ||
| ["git", "rev-parse", "--show-toplevel"], | ||
| capture_output=True, | ||
| text=True, | ||
| check=True, | ||
| ) | ||
| return Path(out.stdout.strip()) | ||
| except (subprocess.CalledProcessError, FileNotFoundError): | ||
| return None | ||
|
|
||
|
|
||
| def find_separator_line(lines: list[str]) -> int | None: | ||
| """Return the line index of the markdown table separator (last | ||
| occurrence — the file has a sample schema row earlier in prose).""" | ||
|
|
@@ -95,17 +114,80 @@ def sort_canonical(text: str) -> tuple[str, dict]: | |
| header = lines[: sep_idx + 1] | ||
| data = lines[sep_idx + 1 :] | ||
|
|
||
| # File-line offset for converting post-separator indices into | ||
| # 1-based file line numbers in error diagnostics. Reviewer P2: | ||
| # 0-based post-separator indices were confusing; the user wants | ||
| # to grep / open-at-line, not arithmetic in their head. | ||
| sep_file_line = sep_idx + 1 # 1-based line number of separator | ||
|
|
||
| data_rows: list[tuple[str, int, str]] = [] | ||
| unmatched_table_rows: list[tuple[int, str]] = [] | ||
| for original_index, line in enumerate(data): | ||
| if not line.strip(): | ||
| continue | ||
| ts = get_timestamp(line) | ||
| if ts: | ||
| data_rows.append((ts, original_index, line)) | ||
| elif line.lstrip().startswith("|"): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
| # Looks like a table row but no timestamp matched — | ||
| # schema drift or malformed row. Refuse to silently drop; | ||
| # caller decides whether to fail or skip after seeing | ||
| # the count. | ||
| unmatched_table_rows.append((original_index, line)) | ||
|
|
||
| rows_in = len(data_rows) | ||
| original_order = [line for _, _, line in data_rows] | ||
|
|
||
| # Trailing non-row content after the table — anything that isn't | ||
| # blank and isn't a table row must be preserved (Codex P2 finding: | ||
| # naive header+rows reconstruction would lose trailing prose). | ||
| # Find the index of the last table-shaped line (matched OR | ||
| # unmatched); everything after that index is trailing content. | ||
| trailing_lines: list[str] = [] | ||
| table_indices = sorted( | ||
| [idx for _, idx, _ in data_rows] | ||
| + [idx for idx, _ in unmatched_table_rows] | ||
| ) | ||
| if table_indices: | ||
| last_table_idx = table_indices[-1] | ||
| # Trailing = lines AFTER the last table-row. Strip leading | ||
| # blank-line separator(s) so the reconstructed file gets a | ||
| # single canonical blank between table-end and prose-start. | ||
| trailing_candidate = data[last_table_idx + 1 :] | ||
| first_non_blank = next( | ||
| (i for i, line in enumerate(trailing_candidate) if line.strip()), | ||
| len(trailing_candidate), | ||
| ) | ||
| if first_non_blank < len(trailing_candidate): | ||
| trailing_lines = trailing_candidate[first_non_blank:] | ||
|
|
||
| # P0 guard: if the data region has table-shaped lines but zero | ||
| # match the timestamp regex, the schema has drifted and the | ||
| # naive write-back would wipe the table. Refuse. | ||
| if rows_in == 0 and unmatched_table_rows: | ||
| first_orig = unmatched_table_rows[0][0] | ||
| raise ValueError( | ||
| f"schema drift: {len(unmatched_table_rows)} table-shaped row(s) " | ||
| f"found but ZERO matched the ISO-8601 timestamp regex. " | ||
| f"Refusing to write — would wipe tick-history. " | ||
| f"First unmatched row at file-line " | ||
| f"{sep_file_line + 1 + first_orig}: " | ||
| f"{unmatched_table_rows[0][1][:120]}" | ||
| ) | ||
|
|
||
| # P1 guard: any unmatched table row is a discipline violation; | ||
| # silently dropping rows is exactly the failure mode this script | ||
| # is supposed to prevent. Surface and refuse. | ||
| if unmatched_table_rows: | ||
| first = unmatched_table_rows[0] | ||
| raise ValueError( | ||
| f"refusing to drop {len(unmatched_table_rows)} unmatched " | ||
| f"table row(s); per Otto-229 (append-only discipline) the " | ||
| f"sort tool must not silently lose rows. " | ||
| f"First unmatched at file-line " | ||
| f"{sep_file_line + 1 + first[0]}: {first[1][:120]}" | ||
| ) | ||
|
AceHack marked this conversation as resolved.
|
||
|
|
||
| # Stable sort by (timestamp, original_index) so ties preserve | ||
| # input order. ISO-8601 strings sort lex == chronological. | ||
| data_rows.sort(key=lambda x: (x[0], x[1])) | ||
|
|
@@ -118,7 +200,17 @@ def sort_canonical(text: str) -> tuple[str, dict]: | |
| seen.add(line) | ||
| unique_rows.append(line) | ||
|
|
||
| new_text = "\n".join(header) + "\n" + "\n".join(unique_rows) + "\n" | ||
| # Reconstruct: header + sorted-rows + (blank-line separator + | ||
| # trailing prose if any). Without trailing-content preservation | ||
| # the naive header + rows reconstruction would silently drop any | ||
| # post-table prose paragraph (Codex P2 finding). The blank-line | ||
| # separator between table-end and trailing-content is required by | ||
| # CommonMark to terminate the table and start a new block. | ||
| parts = ["\n".join(header), "\n".join(unique_rows)] | ||
| if trailing_lines: | ||
| parts.append("") # explicit blank-line separator | ||
| parts.append("\n".join(trailing_lines)) | ||
| new_text = "\n".join(parts) + "\n" | ||
| rows_out = len(unique_rows) | ||
| reordered = unique_rows != original_order[: len(unique_rows)] or rows_out != rows_in | ||
|
|
||
|
|
@@ -127,6 +219,7 @@ def sort_canonical(text: str) -> tuple[str, dict]: | |
| "rows_out": rows_out, | ||
| "duplicates_removed": rows_in - rows_out, | ||
| "reordered": reordered, | ||
| "trailing_lines_preserved": len(trailing_lines), | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -142,11 +235,20 @@ def main(argv: list[str] | None = None) -> int: | |
| parser.add_argument( | ||
| "--file", | ||
| default="docs/hygiene-history/loop-tick-history.md", | ||
| help="Path to tick-history file (relative to repo root)", | ||
| help="Path to tick-history file (relative paths resolve to repo " | ||
| "root via 'git rev-parse --show-toplevel'; if not in a git " | ||
| "checkout, falls back to current working directory)", | ||
| ) | ||
| args = parser.parse_args(argv) | ||
|
|
||
| # Resolve --file relative to repo root when it is a relative path, | ||
| # so the tool works the same whether invoked from repo root or any | ||
| # subdirectory. Sibling hygiene scripts share this convention. | ||
| p = Path(args.file) | ||
| if not p.is_absolute(): | ||
| root = repo_root() | ||
| if root is not None: | ||
| p = root / args.file | ||
| if not p.exists(): | ||
| print(f"ERROR: file not found: {p}", file=sys.stderr) | ||
| return 1 | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.