diff --git a/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md b/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md new file mode 100644 index 0000000000..ecefc847cc --- /dev/null +++ b/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md @@ -0,0 +1,39 @@ +--- +id: B-0170.1 +priority: P1 +status: open +title: "Substrate-claim-checker - semantic-equivalence-drift checker" +created: 2026-05-28 +last_updated: 2026-05-28 +parent: B-0170 +depends_on: [] +classification: buildable-now +decomposition: atomic +owners: [lior] +type: tooling +--- + +# B-0170.1 — Semantic-equivalence-drift checker + +This task implements the "semantic-equivalence-drift" checker, as specified in the parent task B-0170. + +## Scope + +This checker is responsible for detecting claims of semantic equivalence between commands in documentation and verifying them. For example, if a document states that `ll` is an alias for `ls -l`, this checker should be able to validate that claim. + +### V0.1 (This task) + +The initial version of this tool will only *detect* claims of semantic equivalence. It will scan markdown files for patterns like: + +- `... is equivalent to ...` +- `... is an alias for ...` +- `... is the same as ...` + +It will then report the file and line number where these claims are made. Verification of the claims is out of scope for this initial version. + +## Acceptance Criteria + +- A new script `tools/substrate-claim-checker/check-semantic-equivalence.ts` is created. +- The script can be run from the command line. +- The script scans markdown files for claims of semantic equivalence. +- The script outputs a list of found claims with their location. diff --git a/tools/substrate-claim-checker/check-semantic-equivalence.ts b/tools/substrate-claim-checker/check-semantic-equivalence.ts new file mode 100644 index 0000000000..a82cac73e8 --- /dev/null +++ b/tools/substrate-claim-checker/check-semantic-equivalence.ts @@ -0,0 +1,92 @@ +#!/usr/bin/env bun +// check-semantic-equivalence.ts -- finds claims of semantic equivalence in markdown files. +// Part of B-0170.1. + +import fs from 'fs'; +import path from 'path'; + +const CLAIM_REGEX = /`([^`]+)`\s+(is equivalent to|is an alias for|is the same as)\s+`([^`]+)`/gi; +// `upstreams` excludes references/upstreams/ — 85+ full clones of external +// projects; walking it takes minutes and returns mostly noise (per +// .claude/rules/references-upstreams-not-our-code-search-excludes.md). +const IGNORE_DIRS = ['node_modules', '.git', '.vscode', '.idea', 'dist', 'build', 'upstreams']; +const INCLUDE_EXTS = ['.md', '.mdx']; + +interface Match { + file: string; + line: number; + claim: string; +} + +function searchInFile(filePath: string): Match[] { + const matches: Match[] = []; + if (!INCLUDE_EXTS.some(ext => filePath.endsWith(ext))) { + return matches; + } + + try { + const content = fs.readFileSync(filePath, 'utf-8'); + const lines = content.split('\n'); + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + let match; + while ((match = CLAIM_REGEX.exec(line)) !== null) { + matches.push({ + file: filePath, + line: i + 1, + claim: match[0], + }); + } + } + } catch (error) { + // Surface read failures to stderr so silently-skipped files are visible + // (a swallowed read produces a false negative — "no claims" when the file + // was actually unreadable). + console.error(`warn: could not read ${filePath}: ${(error as Error).message}`); + } + + return matches; +} + +function searchInDirectory(dirPath: string): Match[] { + let allMatches: Match[] = []; + const entries = fs.readdirSync(dirPath, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dirPath, entry.name); + if (IGNORE_DIRS.includes(entry.name)) { + continue; + } + + if (entry.isDirectory()) { + allMatches = allMatches.concat(searchInDirectory(fullPath)); + } else if (entry.isFile()) { + allMatches = allMatches.concat(searchInFile(fullPath)); + } + } + + return allMatches; +} + +function main() { + const searchDir = process.argv[2] || process.cwd(); + console.log(`Searching for semantic equivalence claims in ${searchDir}...\n`); + + const allMatches = searchInDirectory(searchDir); + + if (allMatches.length === 0) { + console.log('No semantic equivalence claims found.'); + return; + } + + console.log(`Found ${allMatches.length} potential claims:\n`); + for (const match of allMatches) { + console.log(`- ${match.file}:${match.line}`); + console.log(` > ${match.claim}`); + } +} + +if (import.meta.main) { + main(); +}