diff --git a/docs/backlog/P1/B-0272-atari-rom-canonical-naming-tosec-lookup-2026-05-08.md b/docs/backlog/P1/B-0272-atari-rom-canonical-naming-tosec-lookup-2026-05-08.md
index 232e7d5f9..87ac96332 100644
--- a/docs/backlog/P1/B-0272-atari-rom-canonical-naming-tosec-lookup-2026-05-08.md
+++ b/docs/backlog/P1/B-0272-atari-rom-canonical-naming-tosec-lookup-2026-05-08.md
@@ -17,6 +17,16 @@ type: friction-reducer
Hash each ROM file, look up in TOSEC/No-Intro DAT files,
rename to canonical form. TS script at tools/roms/.
+## Pre-start checklist
+
+- [x] Prior-art search: checked `tools/roms/` (empty), grepped for
+ TOSEC/No-Intro/canonicalize across repo (no existing TS tooling),
+ read parent B-0083 algorithm section and tooling design.
+- [x] Dependency walk: parent B-0083 (decomposed umbrella),
+ sibling B-0273 (depends on this item), no other deps.
+- [x] Datfile format: Logiqx XML used by both TOSEC and No-Intro;
+ `` is the match surface.
+
## Acceptance criteria
- Script at tools/roms/canonicalize.ts
diff --git a/tools/roms/canonicalize.test.ts b/tools/roms/canonicalize.test.ts
new file mode 100644
index 000000000..f7472d63f
--- /dev/null
+++ b/tools/roms/canonicalize.test.ts
@@ -0,0 +1,181 @@
+import { describe, expect, test } from "bun:test";
+import { existsSync, mkdtempSync, writeFileSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { parseDatfile, hashFileSha1, scanRomFiles, matchAndReport } from "./canonicalize.ts";
+
+const FIXTURE_DATFILE = `
+
+
+
+
+ Combat (1977)(Atari)
+
+
+
+ Adventure (1980)(Atari)
+
+
+`;
+
+describe("parseDatfile", () => {
+ test("extracts rom entries from Logiqx XML", () => {
+ const lookup = parseDatfile(FIXTURE_DATFILE);
+ expect(lookup.size).toBe(2);
+ });
+
+ test("keys by lowercase sha1", () => {
+ const lookup = parseDatfile(FIXTURE_DATFILE);
+ expect(lookup.has("da39a3ee5e6b4b0d3255bfef95601890afd80709")).toBe(true);
+ expect(lookup.has("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d")).toBe(true);
+ });
+
+ test("preserves canonical name from datfile", () => {
+ const lookup = parseDatfile(FIXTURE_DATFILE);
+ const entry = lookup.get("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d");
+ expect(entry?.name).toBe("Adventure (1980)(Atari).a26");
+ });
+
+ test("decodes XML entities in canonical names", () => {
+ const xml = ``;
+ const lookup = parseDatfile(xml);
+ const entry = lookup.get("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d");
+ expect(entry?.name).toBe('Fish & Chips "Demo".bin');
+ });
+
+ test("returns empty map for empty input", () => {
+ const lookup = parseDatfile("");
+ expect(lookup.size).toBe(0);
+ });
+
+ test("handles datfile with no sha1 attributes gracefully", () => {
+ const xml = ``;
+ const lookup = parseDatfile(xml);
+ expect(lookup.size).toBe(0);
+ });
+});
+
+describe("hashFileSha1", () => {
+ test("computes correct SHA1 for known content", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-test-"));
+ const file = join(tmp, "test.bin");
+ writeFileSync(file, "hello");
+ const sha1 = hashFileSha1(file);
+ expect(sha1).toBe("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d");
+ });
+
+ test("computes correct SHA1 for empty file", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-test-"));
+ const file = join(tmp, "empty.bin");
+ writeFileSync(file, "");
+ const sha1 = hashFileSha1(file);
+ expect(sha1).toBe("da39a3ee5e6b4b0d3255bfef95601890afd80709");
+ });
+});
+
+describe("scanRomFiles", () => {
+ test("finds .bin and .a26 files", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-scan-"));
+ writeFileSync(join(tmp, "game.bin"), "data");
+ writeFileSync(join(tmp, "game2.a26"), "data");
+ writeFileSync(join(tmp, "README.md"), "docs");
+
+ const files = scanRomFiles(tmp);
+ expect(files.length).toBe(2);
+ expect(files.some((f) => f.endsWith("game.bin"))).toBe(true);
+ expect(files.some((f) => f.endsWith("game2.a26"))).toBe(true);
+ });
+
+ test("ignores non-ROM files", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-scan-"));
+ writeFileSync(join(tmp, "notes.txt"), "data");
+ writeFileSync(join(tmp, "save.sav"), "data");
+
+ const files = scanRomFiles(tmp);
+ expect(files.length).toBe(0);
+ });
+});
+
+describe("matchAndReport", () => {
+ test("matches file by SHA1 and reports canonical name", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-match-"));
+ const file = join(tmp, "hello.bin");
+ writeFileSync(file, "hello");
+
+ const lookup = parseDatfile(FIXTURE_DATFILE);
+ const results = matchAndReport(lookup, [file], false);
+ expect(results.length).toBe(1);
+ expect(results[0]?.matched).toBe(true);
+ expect(results[0]?.canonicalName).toBe("Adventure (1980)(Atari).a26");
+ expect(results[0]?.renamed).toBe(false);
+ });
+
+ test("reports unmatched files", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-nomatch-"));
+ const file = join(tmp, "unknown.bin");
+ writeFileSync(file, "this content has no datfile entry");
+
+ const lookup = parseDatfile(FIXTURE_DATFILE);
+ const results = matchAndReport(lookup, [file], false);
+ expect(results.length).toBe(1);
+ expect(results[0]?.matched).toBe(false);
+ expect(results[0]?.canonicalName).toBeNull();
+ });
+
+ test("renames file when --apply is true", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-rename-"));
+ const file = join(tmp, "wrong-name.bin");
+ writeFileSync(file, "hello");
+
+ const lookup = parseDatfile(FIXTURE_DATFILE);
+ const results = matchAndReport(lookup, [file], true);
+ expect(results[0]?.renamed).toBe(true);
+
+ const renamedPath = join(tmp, "Adventure (1980)(Atari).a26");
+ expect(() => readFileSync(renamedPath)).not.toThrow();
+ });
+
+ test("does not apply unsafe canonical names with path separators", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-unsafe-name-"));
+ const file = join(tmp, "wrong-name.bin");
+ writeFileSync(file, "hello");
+
+ const lookup = parseDatfile(
+ ``,
+ );
+ const results = matchAndReport(lookup, [file], true);
+
+ expect(results[0]?.matched).toBe(true);
+ expect(results[0]?.renamed).toBe(false);
+ expect(existsSync(file)).toBe(true);
+ expect(existsSync(join(tmp, "..", "escaped.a26"))).toBe(false);
+ });
+
+ test("does not apply unsafe canonical names with Windows separators", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-unsafe-win-name-"));
+ const file = join(tmp, "wrong-name.bin");
+ writeFileSync(file, "hello");
+
+ const lookup = parseDatfile(
+ ``,
+ );
+ const results = matchAndReport(lookup, [file], true);
+
+ expect(results[0]?.matched).toBe(true);
+ expect(results[0]?.renamed).toBe(false);
+ expect(existsSync(file)).toBe(true);
+ });
+
+ test("does not rename when file already has canonical name", () => {
+ const tmp = mkdtempSync(join(tmpdir(), "rom-already-"));
+ const file = join(tmp, "Adventure (1980)(Atari).a26");
+ writeFileSync(file, "hello");
+
+ const lookup = parseDatfile(FIXTURE_DATFILE);
+ const results = matchAndReport(lookup, [file], true);
+ expect(results[0]?.matched).toBe(true);
+ expect(results[0]?.renamed).toBe(false);
+ });
+});
diff --git a/tools/roms/canonicalize.ts b/tools/roms/canonicalize.ts
new file mode 100644
index 000000000..82e9b7b3d
--- /dev/null
+++ b/tools/roms/canonicalize.ts
@@ -0,0 +1,320 @@
+#!/usr/bin/env bun
+// canonicalize.ts -- hash ROM files and match against a TOSEC / No-Intro
+// datfile (Logiqx XML format). Smallest safe slice of B-0272.
+//
+// Usage:
+// bun tools/roms/canonicalize.ts --datfile --dir
+// bun tools/roms/canonicalize.ts --datfile --dir --apply
+//
+// Output (default dry-run): JSON array of { file, sha1, match, canonicalName }.
+// --apply: renames matched files to their canonical names.
+
+import { createHash } from "node:crypto";
+import {
+ readdirSync,
+ readFileSync,
+ renameSync,
+ existsSync,
+} from "node:fs";
+import { basename, extname, join } from "node:path";
+
+// --- Datfile parsing (Logiqx XML) ---
+
+interface DatEntry {
+ readonly name: string;
+ readonly size: number;
+ readonly sha1: string;
+ readonly md5: string;
+ readonly crc: string;
+}
+
+const ROM_EXTENSIONS = new Set([
+ ".bin",
+ ".a26",
+ ".rom",
+ ".int",
+ ".vec",
+ ".gg",
+ ".sms",
+ ".gen",
+ ".smc",
+ ".sfc",
+ ".nes",
+ ".gb",
+ ".gbc",
+ ".gba",
+ ".n64",
+ ".z64",
+ ".v64",
+ ".pce",
+ ".ngp",
+ ".ngc",
+ ".ws",
+ ".wsc",
+ ".sg",
+ ".32x",
+ ".col",
+]);
+
+function decodeXmlAttributeValue(value: string): string {
+ return value.replace(
+ /&(#x[0-9a-fA-F]+|#[0-9]+|amp|lt|gt|quot|apos);/g,
+ (_match, entity: string) => {
+ switch (entity) {
+ case "amp":
+ return "&";
+ case "lt":
+ return "<";
+ case "gt":
+ return ">";
+ case "quot":
+ return '"';
+ case "apos":
+ return "'";
+ default:
+ if (entity.startsWith("#x")) {
+ return String.fromCodePoint(parseInt(entity.slice(2), 16));
+ }
+ if (entity.startsWith("#")) {
+ return String.fromCodePoint(parseInt(entity.slice(1), 10));
+ }
+ return `&${entity};`;
+ }
+ },
+ );
+}
+
+function isSafeCanonicalName(name: string): boolean {
+ return (
+ name.length > 0 &&
+ name !== "." &&
+ name !== ".." &&
+ !name.includes("\0") &&
+ !name.includes("/") &&
+ !name.includes("\\")
+ );
+}
+
+export function parseDatfile(xml: string): ReadonlyMap {
+ const lookup = new Map();
+ const romTagRe = /]*?)\/?\s*>/g;
+ const attrRe = /(\w+)\s*=\s*"([^"]*)"/g;
+
+ let tagMatch: RegExpExecArray | null;
+ while ((tagMatch = romTagRe.exec(xml)) !== null) {
+ const attrStr = tagMatch[1] ?? "";
+ const attrs: Record = {};
+ let attrMatch: RegExpExecArray | null;
+ while ((attrMatch = attrRe.exec(attrStr)) !== null) {
+ const key = attrMatch[1];
+ const val = attrMatch[2];
+ if (key !== undefined && val !== undefined) {
+ attrs[key] = decodeXmlAttributeValue(val);
+ }
+ }
+ attrRe.lastIndex = 0;
+
+ const sha1 = attrs["sha1"]?.toLowerCase();
+ const name = attrs["name"];
+ if (sha1 && name) {
+ lookup.set(sha1, {
+ name,
+ size: parseInt(attrs["size"] ?? "0", 10),
+ sha1,
+ md5: (attrs["md5"] ?? "").toLowerCase(),
+ crc: (attrs["crc"] ?? "").toLowerCase(),
+ });
+ }
+ }
+ return lookup;
+}
+
+// --- File hashing ---
+
+export function hashFileSha1(path: string): string {
+ const data = readFileSync(path);
+ return createHash("sha1").update(data).digest("hex");
+}
+
+// --- Directory scanning ---
+
+export function scanRomFiles(dir: string): readonly string[] {
+ const files: string[] = [];
+ for (const entry of readdirSync(dir, { withFileTypes: true })) {
+ if (!entry.isFile()) continue;
+ const ext = extname(entry.name).toLowerCase();
+ if (ROM_EXTENSIONS.has(ext) || ext === "") {
+ files.push(join(dir, entry.name));
+ }
+ }
+ return files.sort();
+}
+
+// --- Match + rename ---
+
+interface MatchResult {
+ readonly file: string;
+ readonly sha1: string;
+ readonly matched: boolean;
+ readonly canonicalName: string | null;
+ readonly renamed: boolean;
+}
+
+export function matchAndReport(
+ datLookup: ReadonlyMap,
+ romFiles: readonly string[],
+ apply: boolean,
+): readonly MatchResult[] {
+ const results: MatchResult[] = [];
+ const usedNames = new Set();
+
+ for (const filePath of romFiles) {
+ const sha1 = hashFileSha1(filePath);
+ const entry = datLookup.get(sha1);
+ const currentName = basename(filePath);
+
+ if (!entry) {
+ results.push({
+ file: filePath,
+ sha1,
+ matched: false,
+ canonicalName: null,
+ renamed: false,
+ });
+ continue;
+ }
+
+ const canonicalName = entry.name;
+ const alreadyCorrect = currentName === canonicalName;
+ let renamed = false;
+
+ if (apply && !alreadyCorrect) {
+ const dir = filePath.slice(0, filePath.length - currentName.length);
+ if (!isSafeCanonicalName(canonicalName)) {
+ process.stderr.write(
+ `skip: unsafe canonical name from datfile: ${canonicalName}\n`,
+ );
+ usedNames.add(canonicalName);
+ results.push({
+ file: filePath,
+ sha1,
+ matched: true,
+ canonicalName,
+ renamed: false,
+ });
+ continue;
+ }
+ const target = join(dir, canonicalName);
+ if (existsSync(target) || usedNames.has(canonicalName)) {
+ process.stderr.write(
+ `skip: target already exists: ${target}\n`,
+ );
+ } else {
+ renameSync(filePath, target);
+ renamed = true;
+ }
+ }
+
+ usedNames.add(canonicalName);
+ results.push({
+ file: filePath,
+ sha1,
+ matched: true,
+ canonicalName,
+ renamed,
+ });
+ }
+
+ return results;
+}
+
+// --- CLI ---
+
+interface Args {
+ readonly datfile: string;
+ readonly dir: string;
+ readonly apply: boolean;
+}
+
+function parseArgs(argv: readonly string[]): Args {
+ let datfile: string | undefined;
+ let dir: string | undefined;
+ let apply = false;
+
+ for (let i = 0; i < argv.length; i++) {
+ const arg = argv[i];
+ if (arg === "--datfile" || arg === "-d") {
+ datfile = argv[++i];
+ } else if (arg === "--dir") {
+ dir = argv[++i];
+ } else if (arg === "--apply") {
+ apply = true;
+ } else if (arg === "--help" || arg === "-h") {
+ process.stdout.write(
+ "Usage: canonicalize.ts --datfile --dir [--apply]\n\n" +
+ " --datfile, -d Path to a TOSEC / No-Intro datfile (Logiqx XML).\n" +
+ " --dir Directory containing ROM files to match.\n" +
+ " --apply Actually rename files (default: dry-run report).\n",
+ );
+ process.exit(0);
+ } else {
+ process.stderr.write(`unknown arg: ${arg}\n`);
+ process.exit(64);
+ }
+ }
+
+ if (!datfile) {
+ process.stderr.write("--datfile is required\n");
+ process.exit(64);
+ }
+ if (!dir) {
+ process.stderr.write("--dir is required\n");
+ process.exit(64);
+ }
+ return { datfile, dir, apply };
+}
+
+export function main(argv: readonly string[]): number {
+ const args = parseArgs(argv);
+
+ if (!existsSync(args.datfile)) {
+ process.stderr.write(`datfile not found: ${args.datfile}\n`);
+ return 1;
+ }
+ if (!existsSync(args.dir)) {
+ process.stderr.write(`directory not found: ${args.dir}\n`);
+ return 1;
+ }
+
+ const datXml = readFileSync(args.datfile, "utf8");
+ const lookup = parseDatfile(datXml);
+ process.stderr.write(`datfile: ${lookup.size} entries loaded\n`);
+
+ const romFiles = scanRomFiles(args.dir);
+ process.stderr.write(`directory: ${romFiles.length} ROM files found\n`);
+
+ if (romFiles.length === 0) {
+ process.stdout.write("[]\n");
+ return 0;
+ }
+
+ const results = matchAndReport(lookup, romFiles, args.apply);
+
+ const matched = results.filter((r) => r.matched);
+ const unmatched = results.filter((r) => !r.matched);
+ const renamed = results.filter((r) => r.renamed);
+
+ process.stdout.write(JSON.stringify(results, null, 2) + "\n");
+
+ process.stderr.write(
+ `\nsummary: ${matched.length} matched, ` +
+ `${unmatched.length} unmatched, ` +
+ `${renamed.length} renamed\n`,
+ );
+
+ return 0;
+}
+
+if (import.meta.main) {
+ process.exit(main(process.argv.slice(2)));
+}