diff --git a/docs/trajectories/typescript-bun-migration/RESUME.md b/docs/trajectories/typescript-bun-migration/RESUME.md index d28b51595..e9a26d426 100644 --- a/docs/trajectories/typescript-bun-migration/RESUME.md +++ b/docs/trajectories/typescript-bun-migration/RESUME.md @@ -1,9 +1,9 @@ # Trajectory — TypeScript / Bun migration -**Status**: Active (Lane B slice 19 merged — [#902](https://github.com/Lucent-Financial-Group/Zeta/pull/902); slice 20 in flight — `lane-b/ts-bun-slice-20-batch-resolve-pr-threads-2026-04-30`) -**Milestone**: 40 ported + 1 in-flight = 41 total. Budget cluster (14/18/19) and peer-call cluster (15/16/17) both complete. After slice 20 lands, Bucket B reduces to 1 unported file (`tools/pr-preservation/archive-pr.sh` 674L — bash+Python mix). Bucket C: 2 (`tools/hygiene/check-github-settings-drift.sh` + `tools/hygiene/snapshot-github-settings.sh`). +**Status**: Active (Lane B slice 20 merged — [#907](https://github.com/Lucent-Financial-Group/Zeta/pull/907); slice 21 in flight — `lane-b/ts-bun-slice-21-archive-pr-2026-04-30`) +**Milestone**: 41 ported + 1 in-flight = 42 total. Budget cluster (14/18/19) and peer-call cluster (15/16/17) both complete; git cluster complete (13/20). Slice 21 (archive-pr.sh — last Bucket B file) is in flight; **after slice 21 lands, Bucket B is empty**. Bucket C: 2 (`tools/hygiene/check-github-settings-drift.sh` + `tools/hygiene/snapshot-github-settings.sh`). **Current blocker**: None. -**Next concrete action**: After slice 20 merges, slice 21 = `tools/pr-preservation/archive-pr.sh` is the last Bucket B file (674 lines, bash+Python mix — most complex remaining). Per Gate B: read-only scope first when possible — but the remaining file is state-mutating (gh API + writes drain logs). +**Next concrete action**: After slice 21 merges, **Bucket B is empty** — TS+Bun migration trajectory transitions from "porting" phase to "soak + bash retirement" phase. Bucket C (2 files using gh-api heavily) requires maintainer decision on shell-out vs Octokit. Bucket A (14 setup-script files) stays bash by design. **Last updated**: 2026-04-30 ## Why this trajectory exists diff --git a/docs/trajectories/typescript-bun-migration/slice-audits.md b/docs/trajectories/typescript-bun-migration/slice-audits.md index 24da77a90..3463802fe 100644 --- a/docs/trajectories/typescript-bun-migration/slice-audits.md +++ b/docs/trajectories/typescript-bun-migration/slice-audits.md @@ -411,7 +411,41 @@ Per-port pattern checklist: Slice 6 passes audit. No new patterns recorded — all reused from prior slices. -## Slice 20 — 1 port (git/batch-resolve-pr-threads — last git-cluster port) (PR pending — `lane-b/ts-bun-slice-20-batch-resolve-pr-threads-2026-04-30`) +## Slice 21 — 1 port (pr-preservation/archive-pr — last Bucket B file) (PR pending — `lane-b/ts-bun-slice-21-archive-pr-2026-04-30`) + +**Slice files**: + +- `tools/pr-preservation/archive-pr.{sh→ts}` (PR-conversation preservation — fetches review threads + reviews + general comments via gh GraphQL and writes `docs/pr-discussions/PR--.md` archive; closes Bucket B) + +**Comparison points**: identical to slice 20. Within Gate B 30-day window. tsc gate active per #890. + +### Code-pattern audit (per-port) + +- **`archive-pr.ts`** (674 → 806 lines): the bash original was 217 lines bash + ~457 lines embedded Python (GraphQL fetcher + Markdown formatter). The TS port collapses both into a single Bun runtime — no more bash/Python boundary, no more `mktemp` + temp-file shuffle, no `set +e` to capture Python's exit code. Drops Python from runtime deps entirely. The TS line count is larger than bash because explicit type interfaces (PullRequest / ThreadNode / ReviewNode / CommentNode / etc.) replace Python's untyped dict navigation; the explicit typing is the cost of the language change, paid once at port time. +- **3-axis paginated GraphQL fetch** preserved 1:1: top-level `reviewThreads` (100/page) + `reviews` (50/page) + `comments` (100/page), then per-thread comments (100/page) for threads with `>100` comments. Generic helper `paginateTopLevel` handles the cursor loop with type-safe `extractor` callback per connection. `paginateThreadComments` handles the per-thread case. +- **NWO parsing with Enterprise HOST/OWNER/REPO support** preserved verbatim per Codex P2 #846. `parseRepoNwo` accepts 2-segment (github.com default) or 3-segment (HOST must contain a dot — rejects `owner/repo/extra` ambiguity). Slash-injection defence on owner/name preserved. +- **Idempotency via PR-NNNN glob** (Otto-235): `findExistingArchive` reads `docs/pr-discussions/` + filters on `PR--` prefix, sorts deterministically, reuses first match. Title edits update in-place rather than orphaning the old slug. +- **Markdown post-processor with CommonMark §4.5 fence handling** preserved: `detectFenceMarker` enforces leading-space-count ≤ 3 + no tab in prefix; closer must match marker char (backtick/tilde) AND length ≥ opener (allows nested fences via longer opener). Inside fences, no normalization — audit fidelity wins. Outside fences, whitespace-only → empty + 3+ blank-line collapse to 2. +- **Python `json.dumps` ensure_ascii=True** for YAML-quoted titles required a non-trivial fix: `JSON.stringify` in JS preserves non-ASCII characters as-is (the right-arrow stays a literal Unicode codepoint in output); Python's default escapes them to `\uXXXX` form (e.g. the right-arrow becomes `→`, em-dash becomes `—`). The TS port's `yamlQuote` post-processes the JSON output, replacing each non-ASCII codepoint with its `\uXXXX` form to match Python's wire-format default. Both bash and TS now emit identical `—`-style escaped strings. + +### Equivalence audit + +Diff'd against bash output on this repo state (2026-04-30 main, run against PR #902): + +- **Argument-validation paths**: same exit code (1) and same error-message body on 2 sampled paths — no args, `abc`. The usage-line script-path is intentionally NOT byte-equivalent: bash echoes `$0` (showing the actual `./tools/pr-preservation/archive-pr.sh` path), TS hard-codes `bun tools/pr-preservation/archive-pr.ts` to give the user the form they should run. Same self-describing-line carve-out as the `archive_tool` YAML field. Note exit code 1 (not 2) on argument errors here — consistent with bash original; differs from the slice 18/19/20 budget+git scripts (those use 2 for arg errors). +- **Live archive run on PR #902** (4 threads, 2 reviews, 0 comments): byte-equivalent EXCEPT `archived_at` (timestamp) + `archive_tool` (.sh vs .ts — deliberate self-reference). Title with non-ASCII characters (right-arrow + em-dash) escapes correctly to `→` and `—` matching Python's `json.dumps` default. + +### Behavioural note vs bash original + +- The bash + Python mix dropped to single Bun runtime. The two-stage Python invocation (validation + formatter) collapses into a single TS function call. +- The `mktemp` temp file and `trap 'rm -f "$TMP"' EXIT` cleanup are removed — no temp file needed, the fetched data lives in memory. +- Markdown post-processing fence handling preserved CommonMark §4.5 strictly: opening fence ≤ 3 leading spaces + no tab in prefix; closing fence same marker char + length ≥ opener. + +### Outcome + +Slice 21 passes audit. **Bucket B closed** (after this PR merges, every file flagged for TS port has been ported). The TS+Bun migration trajectory transitions from "porting" phase to "soak + bash retirement" phase. Bucket C (2 files using gh-api heavily) remains pending maintainer decision (shell-out vs Octokit). Bucket A (14 setup-script files) stays bash by design. + +## Slice 20 — 1 port (git/batch-resolve-pr-threads — last git-cluster port) (PR #907, merged 2026-04-30, commit `a8e15f3`) **Slice files**: diff --git a/tools/pr-preservation/archive-pr.ts b/tools/pr-preservation/archive-pr.ts new file mode 100644 index 000000000..4fdbbcf48 --- /dev/null +++ b/tools/pr-preservation/archive-pr.ts @@ -0,0 +1,806 @@ +#!/usr/bin/env bun +// archive-pr.ts — minimal git-native PR-conversation preservation (Otto-207). +// TypeScript+Bun port of archive-pr.sh, slice 21 of the TS+Bun migration. +// +// Fetches a PR's review threads + general comments + reviews via +// `gh api graphql` and writes them to docs/pr-discussions/PR--.md +// for audit trail outside of GitHub. The bash original embedded ~400 lines +// of Python for GraphQL pagination + markdown formatting; this TS port +// removes the Python dependency entirely (single runtime: Bun). +// +// Usage: +// bun tools/pr-preservation/archive-pr.ts +// +// Output: writes docs/pr-discussions/PR--.md with YAML +// frontmatter (pr_number, title, author, merged_at, state, archived_at) +// + all review threads + reviews + general PR comments. PR numbers are +// zero-padded to four digits in the filename so archives sort +// lexicographically in the same order as numerically up to PR #9999. +// +// Idempotency: PR number is the canonical archive key. On re-archive, +// detect an existing PR--*.md file and reuse its path regardless +// of current title, so title edits update in place rather than orphaning +// the old slug. +// +// Exit codes: +// 0 success +// 1 missing arg / gh CLI not authenticated / repo detect failed / +// not inside a git working tree +// 2 PR fetch failed (auth / network / GraphQL errors / not found / +// pullRequest: null / formatter errors) + +import { spawnSync } from "node:child_process"; +import { mkdirSync, readdirSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; + +const SPAWN_MAX_BUFFER = 64 * 1024 * 1024; + +interface ArgError { + readonly error: string; + readonly exitCode: 1 | 2; +} + +interface ParsedArgs { + readonly pr: number; +} + +function parseArgs(argv: readonly string[]): ParsedArgs | ArgError { + if (argv.length < 1) { + return { + error: "usage: bun tools/pr-preservation/archive-pr.ts ", + exitCode: 1, + }; + } + const prRaw = argv[0] ?? ""; + // Validate PR is a positive integer before invoking gh. + if (!/^[0-9]+$/.test(prRaw)) { + return { + error: `error: PR number must be a non-empty positive integer (got: '${prRaw}')\nusage: bun tools/pr-preservation/archive-pr.ts `, + exitCode: 1, + }; + } + const pr = Number.parseInt(prRaw, 10); + if (pr <= 0) { + return { + error: `error: PR number must be a non-empty positive integer (got: '${prRaw}')\nusage: bun tools/pr-preservation/archive-pr.ts `, + exitCode: 1, + }; + } + return { pr }; +} + +function gitRevParseShowToplevel(): string | ArgError { + // eslint-disable-next-line sonarjs/no-os-command-from-path + const result = spawnSync("git", ["rev-parse", "--show-toplevel"], { + encoding: "utf8", + maxBuffer: SPAWN_MAX_BUFFER, + }); + if (result.status !== 0) { + return { + error: + "error: not inside a git working tree. archive-pr.ts must run from a Zeta checkout so the docs/pr-discussions/ output lives in the right repo.", + exitCode: 1, + }; + } + return result.stdout.trim(); +} + +interface RepoNwo { + readonly owner: string; + readonly name: string; + readonly host: string; +} + +function detectRepoNwo(): string | ArgError { + // Resolution order: + // 1. GH_REPO env var (e.g. GH_REPO=AceHack/Zeta) — needed for cross-fork + // archives. + // 2. `gh repo view --json nameWithOwner --jq .nameWithOwner` fallback. + const envRepo = process.env["GH_REPO"]; + if (envRepo !== undefined && envRepo.length > 0) return envRepo; + // eslint-disable-next-line sonarjs/no-os-command-from-path + const result = spawnSync( + "gh", + ["repo", "view", "--json", "nameWithOwner", "--jq", ".nameWithOwner"], + { + encoding: "utf8", + maxBuffer: SPAWN_MAX_BUFFER, + }, + ); + if (result.status !== 0) { + return { + error: + "error: could not detect repo (need GH_REPO=[HOST/]OWNER/REPO env var or 'gh repo view' to succeed). Is gh authenticated and this a GitHub repo?", + exitCode: 1, + }; + } + return result.stdout.trim(); +} + +function parseRepoNwo(nwo: string): RepoNwo | ArgError { + // Strict parser per gh CLI docs: `[HOST/]OWNER/REPO`. + // - 2-segment: github.com (default). + // - 3-segment HOST/OWNER/REPO: HOST must contain a dot. + // - 4+ segments: reject. + const segments = nwo.split("/"); + if (segments.length === 2) { + const owner = segments[0] ?? ""; + const name = segments[1] ?? ""; + if (owner.length === 0 || name.length === 0) { + return { + error: `error: could not detect repo (need GH_REPO=[HOST/]OWNER/REPO env var or 'gh repo view' to succeed). Is gh authenticated and this a GitHub repo? Got: '${nwo}'`, + exitCode: 1, + }; + } + return { owner, name, host: "" }; + } + if (segments.length === 3) { + const host = segments[0] ?? ""; + const owner = segments[1] ?? ""; + const name = segments[2] ?? ""; + if (owner.length === 0 || name.length === 0) { + return { + error: `error: could not detect repo (need GH_REPO=[HOST/]OWNER/REPO env var or 'gh repo view' to succeed). Is gh authenticated and this a GitHub repo? Got: '${nwo}'`, + exitCode: 1, + }; + } + if (!host.includes(".")) { + return { + error: `error: GH_REPO 3-segment form must be HOST/OWNER/REPO where HOST is a hostname containing a dot (e.g. github.example.com/owner/repo). Got: '${nwo}'`, + exitCode: 1, + }; + } + return { owner, name, host }; + } + return { + error: `error: could not detect repo (need GH_REPO=[HOST/]OWNER/REPO env var or 'gh repo view' to succeed). Is gh authenticated and this a GitHub repo? Got: '${nwo}'`, + exitCode: 1, + }; +} + +interface AuthorNode { + readonly login?: string; +} + +interface CommentNode { + readonly author?: AuthorNode; + readonly body?: string; + readonly createdAt?: string; + readonly updatedAt?: string; +} + +interface PageInfo { + readonly hasNextPage?: boolean; + readonly endCursor?: string | null; +} + +interface CommentsConn { + readonly pageInfo?: PageInfo; + readonly nodes?: readonly CommentNode[]; +} + +interface ThreadNode { + readonly id?: string; + readonly isResolved?: boolean; + readonly path?: string | null; + readonly line?: number | null; + readonly originalLine?: number | null; + readonly comments?: CommentsConn; +} + +interface ReviewNode { + readonly author?: AuthorNode; + readonly state?: string; + readonly body?: string; + readonly submittedAt?: string; +} + +interface ThreadsConn { + readonly pageInfo?: PageInfo; + readonly nodes?: readonly ThreadNode[]; +} + +interface ReviewsConn { + readonly pageInfo?: PageInfo; + readonly nodes?: readonly ReviewNode[]; +} + +interface PrCommentsConn { + readonly pageInfo?: PageInfo; + readonly nodes?: readonly CommentNode[]; +} + +interface PullRequest { + readonly number?: number; + readonly title?: string; + readonly author?: AuthorNode; + readonly state?: string; + readonly createdAt?: string; + readonly mergedAt?: string | null; + readonly closedAt?: string | null; + readonly headRefName?: string; + readonly baseRefName?: string; + readonly body?: string; + readonly reviewThreads?: ThreadsConn; + readonly reviews?: ReviewsConn; + readonly comments?: PrCommentsConn; +} + +interface GraphQLResponse { + readonly data?: { + readonly repository?: { readonly pullRequest?: PullRequest | null } | null; + readonly node?: { + readonly comments?: CommentsConn; + } | null; + }; + readonly errors?: readonly unknown[]; +} + +const TOP_QUERY = ` + query($owner: String!, $name: String!, $number: Int!, + $threadsAfter: String, $commentsAfter: String, $reviewsAfter: String) { + repository(owner: $owner, name: $name) { + pullRequest(number: $number) { + number + title + author { login } + state + createdAt + mergedAt + closedAt + headRefName + baseRefName + body + reviewThreads(first: 100, after: $threadsAfter) { + pageInfo { hasNextPage endCursor } + nodes { + id + isResolved + path + line + originalLine + comments(first: 100) { + pageInfo { hasNextPage endCursor } + nodes { author { login } body createdAt updatedAt } + } + } + } + reviews(first: 50, after: $reviewsAfter) { + pageInfo { hasNextPage endCursor } + nodes { author { login } state body submittedAt } + } + comments(first: 100, after: $commentsAfter) { + pageInfo { hasNextPage endCursor } + nodes { author { login } body createdAt } + } + } + } + } +`; + +const THREAD_COMMENTS_QUERY = ` + query($threadId: ID!, $after: String) { + node(id: $threadId) { + ... on PullRequestReviewThread { + comments(first: 100, after: $after) { + pageInfo { hasNextPage endCursor } + nodes { author { login } body createdAt updatedAt } + } + } + } + } +`; + +interface FetchError { + readonly error: string; + readonly exitCode: 2; +} + +function ghGraphQL(args: { + readonly host: string; + readonly query: string; + readonly variables: Readonly>; +}): GraphQLResponse | FetchError { + // Honour HOST when REPO_HOST was parsed from a 3-segment GH_REPO + // (Codex P2 #846). + const cmd: string[] = ["api"]; + if (args.host.length > 0) { + cmd.push("--hostname", args.host); + } + cmd.push("graphql", "-f", `query=${args.query}`); + for (const [k, v] of Object.entries(args.variables)) { + if (v === null) continue; + if (typeof v === "number") cmd.push("-F", `${k}=${String(v)}`); + else cmd.push("-f", `${k}=${v}`); + } + // eslint-disable-next-line sonarjs/no-os-command-from-path + const result = spawnSync("gh", cmd, { + encoding: "utf8", + maxBuffer: SPAWN_MAX_BUFFER, + }); + if (result.status !== 0) { + return { + error: `gh api graphql failed (exit ${String(result.status ?? "null")}):\n${result.stderr}`, + exitCode: 2, + }; + } + let parsed: GraphQLResponse; + try { + parsed = JSON.parse(result.stdout) as GraphQLResponse; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return { + error: `non-JSON response from gh api graphql: ${message}\n${result.stdout.slice(0, 2000)}`, + exitCode: 2, + }; + } + if (parsed.errors !== undefined && parsed.errors.length > 0) { + return { + error: `GraphQL errors:\n${JSON.stringify(parsed.errors, null, 2)}`, + exitCode: 2, + }; + } + return parsed; +} + +interface FetchedPr { + readonly pr: PullRequest; + readonly threads: readonly ThreadNode[]; + readonly reviews: readonly ReviewNode[]; + readonly comments: readonly CommentNode[]; +} + +function paginateTopLevel(args: { + readonly host: string; + readonly owner: string; + readonly name: string; + readonly prNumber: number; + readonly initialNodes: readonly T[]; + readonly initialPageInfo: PageInfo | undefined; + readonly variableName: string; + readonly extractor: (page: PullRequest) => { nodes: readonly T[]; pageInfo: PageInfo | undefined }; +}): readonly T[] | FetchError { + const all: T[] = [...args.initialNodes]; + let cursor: string | null = + args.initialPageInfo?.hasNextPage === true ? args.initialPageInfo.endCursor ?? null : null; + while (cursor !== null && cursor.length > 0) { + const page = ghGraphQL({ + host: args.host, + query: TOP_QUERY, + variables: { + owner: args.owner, + name: args.name, + number: args.prNumber, + [args.variableName]: cursor, + }, + }); + if ("error" in page) return page; + const pagePr = page.data?.repository?.pullRequest; + if (pagePr === null || pagePr === undefined) break; + const conn = args.extractor(pagePr); + for (const n of conn.nodes) all.push(n); + cursor = conn.pageInfo?.hasNextPage === true ? conn.pageInfo.endCursor ?? null : null; + } + return all; +} + +function paginateThreadComments(args: { + readonly host: string; + readonly threadId: string; + readonly initialNodes: readonly CommentNode[]; + readonly initialPageInfo: PageInfo | undefined; +}): readonly CommentNode[] | FetchError { + const all: CommentNode[] = Array.from(args.initialNodes); + let cursor: string | null = + args.initialPageInfo?.hasNextPage === true ? args.initialPageInfo.endCursor ?? null : null; + while (cursor !== null && cursor.length > 0) { + const page = ghGraphQL({ + host: args.host, + query: THREAD_COMMENTS_QUERY, + variables: { threadId: args.threadId, after: cursor }, + }); + if ("error" in page) return page; + const conn = page.data?.node?.comments; + if (conn === undefined) break; + for (const c of conn.nodes ?? []) all.push(c); + cursor = conn.pageInfo?.hasNextPage === true ? conn.pageInfo.endCursor ?? null : null; + } + return all; +} + +function fetchPullRequest(args: { + readonly host: string; + readonly owner: string; + readonly name: string; + readonly prNumber: number; +}): FetchedPr | FetchError { + // First page. + const first = ghGraphQL({ + host: args.host, + query: TOP_QUERY, + variables: { owner: args.owner, name: args.name, number: args.prNumber }, + }); + if ("error" in first) return first; + const pr = first.data?.repository?.pullRequest; + if (pr === null || pr === undefined) { + return { + error: `pullRequest is null for ${args.owner}/${args.name}#${String(args.prNumber)} (not found, private, or access denied).`, + exitCode: 2, + }; + } + // Paginate top-level connections. + const threadsResult = paginateTopLevel({ + ...args, + initialNodes: pr.reviewThreads?.nodes ?? [], + initialPageInfo: pr.reviewThreads?.pageInfo, + variableName: "threadsAfter", + extractor: (p) => ({ nodes: p.reviewThreads?.nodes ?? [], pageInfo: p.reviewThreads?.pageInfo }), + }); + if ("error" in threadsResult) return threadsResult; + const reviewsResult = paginateTopLevel({ + ...args, + initialNodes: pr.reviews?.nodes ?? [], + initialPageInfo: pr.reviews?.pageInfo, + variableName: "reviewsAfter", + extractor: (p) => ({ nodes: p.reviews?.nodes ?? [], pageInfo: p.reviews?.pageInfo }), + }); + if ("error" in reviewsResult) return reviewsResult; + const commentsResult = paginateTopLevel({ + ...args, + initialNodes: pr.comments?.nodes ?? [], + initialPageInfo: pr.comments?.pageInfo, + variableName: "commentsAfter", + extractor: (p) => ({ nodes: p.comments?.nodes ?? [], pageInfo: p.comments?.pageInfo }), + }); + if ("error" in commentsResult) return commentsResult; + // Per-thread comments pagination — each thread may have its own >100 + // comment count. + const expandedThreads: ThreadNode[] = []; + for (const t of threadsResult) { + if (t.id === undefined) { + expandedThreads.push(t); + continue; + } + const threadComments = paginateThreadComments({ + host: args.host, + threadId: t.id, + initialNodes: t.comments?.nodes ?? [], + initialPageInfo: t.comments?.pageInfo, + }); + if ("error" in threadComments) return threadComments; + expandedThreads.push({ + ...t, + comments: { nodes: threadComments }, + }); + } + return { + pr, + threads: expandedThreads, + reviews: reviewsResult, + comments: commentsResult, + }; +} + +function makeSlug(title: string): string { + // Match Python `re.sub(r'[^a-zA-Z0-9]+', '-', title).strip('-').lower()[:60].strip('-') or 'untitled'`. + let slug = title.replace(/[^a-zA-Z0-9]+/g, "-"); + slug = slug.replace(/^-+/, "").replace(/-+$/, "").toLowerCase(); + slug = slug.slice(0, 60).replace(/-+$/, ""); + return slug.length > 0 ? slug : "untitled"; +} + +function findExistingArchive(outDir: string, prNumber: number): string | null { + // Idempotency: PR number is canonical key. Reuse existing PR-NNNN-*.md + // path regardless of current title (Otto-235). + const prefix = `PR-${String(prNumber).padStart(4, "0")}-`; + let entries: readonly string[]; + try { + entries = readdirSync(outDir); + } catch { + return null; + } + const matches = entries + .filter((e) => e.startsWith(prefix) && e.endsWith(".md")) + .sort(); + return matches[0] !== undefined ? join(outDir, matches[0]) : null; +} + +function yamlQuote(s: string | null | undefined): string { + // Match Python `json.dumps('' if s is None else str(s))` — gives valid + // double-quoted YAML strings. Python's json.dumps defaults to + // ensure_ascii=True, which escapes non-ASCII characters as \uXXXX; + // JavaScript's JSON.stringify does NOT escape non-ASCII by default, + // so for byte-equivalence we post-process the JSON-stringified value + // and replace each non-ASCII codepoint with its \uXXXX form. + const json = JSON.stringify(s ?? ""); + return json.replace(/[€-￿]/g, (ch) => { + const code = ch.charCodeAt(0); + return `\\u${code.toString(16).padStart(4, "0")}`; + }); +} + +function nowIsoUtcSecs(): string { + // Match Python `datetime.utcnow().isoformat(timespec='seconds') + 'Z'`. + return new Date().toISOString().replace(/\.\d{3}Z$/, "Z"); +} + +function buildFrontmatter(args: { + readonly pr: PullRequest; + readonly archivedAt: string; +}): string { + const pr = args.pr; + const lines: string[] = []; + lines.push("---"); + lines.push(`pr_number: ${String(pr.number ?? 0)}`); + lines.push(`title: ${yamlQuote(pr.title ?? "untitled")}`); + lines.push(`author: ${yamlQuote(pr.author?.login ?? "unknown")}`); + lines.push(`state: ${yamlQuote(pr.state ?? "")}`); + lines.push(`created_at: ${yamlQuote(pr.createdAt ?? "")}`); + if (pr.mergedAt !== null && pr.mergedAt !== undefined && pr.mergedAt.length > 0) { + lines.push(`merged_at: ${yamlQuote(pr.mergedAt)}`); + } + if (pr.closedAt !== null && pr.closedAt !== undefined && pr.closedAt.length > 0) { + lines.push(`closed_at: ${yamlQuote(pr.closedAt)}`); + } + lines.push(`head_ref: ${yamlQuote(pr.headRefName ?? "")}`); + lines.push(`base_ref: ${yamlQuote(pr.baseRefName ?? "")}`); + lines.push(`archived_at: ${yamlQuote(args.archivedAt)}`); + lines.push(`archive_tool: ${yamlQuote("tools/pr-preservation/archive-pr.ts")}`); + lines.push("---"); + return lines.join("\n"); +} + +function rstripNewlines(s: string): string { + // Match Python `.rstrip('\n')` — only strip newlines, preserve trailing + // whitespace (markdown two-space hard-line-break). + return s.replace(/\n+$/, ""); +} + +function buildBodySections(args: { + readonly pr: PullRequest; + readonly threads: readonly ThreadNode[]; + readonly reviews: readonly ReviewNode[]; + readonly comments: readonly CommentNode[]; +}): string { + const pr = args.pr; + const sections: string[] = []; + sections.push(`# PR #${String(pr.number ?? 0)}: ${pr.title ?? "untitled"}`); + sections.push(""); + + const body = pr.body ?? ""; + if (body.trim().length > 0) { + sections.push("## PR description"); + sections.push(""); + sections.push(rstripNewlines(body)); + sections.push(""); + } + if (args.reviews.length > 0) { + sections.push("## Reviews"); + sections.push(""); + for (const r of args.reviews) { + const author = r.author?.login ?? "unknown"; + const state = r.state ?? "COMMENTED"; + const submitted = r.submittedAt ?? ""; + const bodyText = rstripNewlines(r.body ?? ""); + sections.push(`### ${state} — @${author} (${submitted})`); + sections.push(""); + sections.push(bodyText.trim().length > 0 ? bodyText : "_(no body)_"); + sections.push(""); + } + } + if (args.threads.length > 0) { + sections.push("## Review threads"); + sections.push(""); + let i = 0; + for (const t of args.threads) { + i++; + const pathRef = t.path ?? "(no path)"; + const lineNum = t.line ?? t.originalLine ?? "?"; + const resolved = t.isResolved === true ? "resolved" : "unresolved"; + sections.push(`### Thread ${String(i)}: ${pathRef}:${String(lineNum)} (${resolved})`); + sections.push(""); + const tComments = t.comments?.nodes ?? []; + for (const c of tComments) { + const author = c.author?.login ?? "unknown"; + const when = c.createdAt ?? ""; + const bodyText = rstripNewlines(c.body ?? ""); + sections.push(`**@${author}** (${when}):`); + sections.push(""); + sections.push(bodyText); + sections.push(""); + } + } + } + if (args.comments.length > 0) { + sections.push("## General comments"); + sections.push(""); + for (const c of args.comments) { + const author = c.author?.login ?? "unknown"; + const when = c.createdAt ?? ""; + const bodyText = rstripNewlines(c.body ?? ""); + sections.push(`### @${author} (${when})`); + sections.push(""); + sections.push(bodyText); + sections.push(""); + } + } + return sections.join("\n"); +} + +interface FenceState { + inFence: boolean; + fenceMarker: string | null; + fenceLength: number; +} + +function detectFenceMarker(rawLine: string): { marker: string | null; markerLen: number } { + // Per CommonMark §4.5: opening fence permits up to 3 spaces of indent; + // 4+ spaces or any tab in the prefix → not a fence. + let leadingSpaceCount = 0; + while (leadingSpaceCount < rawLine.length && rawLine[leadingSpaceCount] === " ") { + leadingSpaceCount++; + } + const leadingChars = rawLine.slice(0, leadingSpaceCount); + if (leadingSpaceCount > 3 || leadingChars.includes("\t")) { + return { marker: null, markerLen: 0 }; + } + const afterSpaces = rawLine.slice(leadingSpaceCount); + if (afterSpaces.startsWith("```")) { + let n = 0; + while (n < afterSpaces.length && afterSpaces[n] === "`") n++; + return { marker: "`", markerLen: n }; + } + if (afterSpaces.startsWith("~~~")) { + let n = 0; + while (n < afterSpaces.length && afterSpaces[n] === "~") n++; + return { marker: "~", markerLen: n }; + } + return { marker: null, markerLen: 0 }; +} + +function postprocessContent(content: string): string { + // Match Python post-processor: collapse 3+ blank lines to 2 and normalize + // whitespace-only lines to empty, but ONLY outside fenced code blocks + // (CommonMark §4.5). Inside fences, preserve verbatim. + const collapsed: string[] = []; + let blankRun = 0; + const state: FenceState = { inFence: false, fenceMarker: null, fenceLength: 0 }; + for (const rawLineOrig of content.split("\n")) { + let rawLine = rawLineOrig; + const { marker, markerLen } = detectFenceMarker(rawLine); + if (marker !== null) { + if (!state.inFence) { + // Opening fence. + state.inFence = true; + state.fenceMarker = marker; + state.fenceLength = markerLen; + blankRun = 0; + collapsed.push(rawLine); + continue; + } + if (marker === state.fenceMarker && markerLen >= state.fenceLength) { + // Closing fence. + state.inFence = false; + state.fenceMarker = null; + state.fenceLength = 0; + blankRun = 0; + collapsed.push(rawLine); + continue; + } + // Fence-shaped line that isn't a valid closer — fall through. + } + if (state.inFence) { + collapsed.push(rawLine); + continue; + } + // Outside fences: normalize whitespace-only lines to empty without + // touching inline trailing whitespace on lines with text. + if (rawLine.length > 0 && rawLine.trim().length === 0) { + rawLine = ""; + } + if (rawLine === "") blankRun++; + else blankRun = 0; + if (blankRun <= 2) collapsed.push(rawLine); + } + return collapsed.join("\n").replace(/\n+$/, "") + "\n"; +} + +function formatArchive(args: { + readonly fetched: FetchedPr; + readonly archivedAt: string; +}): string { + const frontmatter = buildFrontmatter({ + pr: args.fetched.pr, + archivedAt: args.archivedAt, + }); + const sections = buildBodySections({ + pr: args.fetched.pr, + threads: args.fetched.threads, + reviews: args.fetched.reviews, + comments: args.fetched.comments, + }); + // Match Python `'\n'.join(lines).rstrip('\n') + '\n'` then post-processor + // (which does its own final rstrip+newline). + const initial = `${frontmatter}\n\n${sections}`.replace(/\n+$/, "") + "\n"; + return postprocessContent(initial); +} + +interface RepoOk { + readonly owner: string; + readonly name: string; + readonly host: string; +} + +interface RepoError { + readonly error: string; + readonly exitCode: 1 | 2; +} + +function setupRepo(): { repoRoot: string; repo: RepoOk } | RepoError { + const root = gitRevParseShowToplevel(); + if (typeof root !== "string") return root; + const nwo = detectRepoNwo(); + if (typeof nwo !== "string") return nwo; + const parsed = parseRepoNwo(nwo); + if ("error" in parsed) return parsed; + // Reject embedded slashes inside owner/repo (defence in depth — the case + // patterns above should already prevent this, but a path-injection here + // would land archive output outside docs/pr-discussions/). + if (parsed.owner.includes("/")) { + return { + error: `error: REPO_OWNER cannot contain a slash (got: '${parsed.owner}')`, + exitCode: 1, + }; + } + if (parsed.name.includes("/")) { + return { + error: `error: REPO_NAME cannot contain a slash (got: '${parsed.name}')`, + exitCode: 1, + }; + } + return { repoRoot: root, repo: parsed }; +} + +export function main(argv: readonly string[]): number { + const args = parseArgs(argv); + if ("error" in args) { + process.stderr.write(`${args.error}\n`); + return args.exitCode; + } + const setup = setupRepo(); + if ("error" in setup) { + process.stderr.write(`${setup.error}\n`); + return setup.exitCode; + } + const outDir = join(setup.repoRoot, "docs", "pr-discussions"); + mkdirSync(outDir, { recursive: true }); + + const fetched = fetchPullRequest({ + host: setup.repo.host, + owner: setup.repo.owner, + name: setup.repo.name, + prNumber: args.pr, + }); + if ("error" in fetched) { + process.stderr.write(`${fetched.error}\n`); + return fetched.exitCode; + } + + const archivedAt = nowIsoUtcSecs(); + const content = formatArchive({ fetched, archivedAt }); + + const existing = findExistingArchive(outDir, args.pr); + const path = + existing ?? + join(outDir, `PR-${String(args.pr).padStart(4, "0")}-${makeSlug(fetched.pr.title ?? "untitled")}.md`); + + writeFileSync(path, content); + process.stdout.write( + `wrote ${path} (${String(content.length)} bytes, ${String(fetched.threads.length)} threads, ${String(fetched.reviews.length)} reviews, ${String(fetched.comments.length)} comments)\n`, + ); + return 0; +} + +if (import.meta.main) { + process.exit(main(process.argv.slice(2))); +}