From 8765e70362c703b2f5d88ca0bd6f8f79bec1ba13 Mon Sep 17 00:00:00 2001 From: Lior Date: Thu, 28 May 2026 12:59:21 -0400 Subject: [PATCH 1/3] feat(tools): Add semantic equivalence claim checker (B-0170.1) This change implements the first version of the semantic equivalence claim checker, a sub-task of B-0170. - Creates the backlog item 'B-0170.1'. - Adds the script 'tools/substrate-claim-checker/check-semantic-equivalence.ts' to detect claims of semantic equivalence in markdown files. --- ...70.1-semantic-equivalence-drift-checker.md | 38 +++++++++ .../check-semantic-equivalence.ts | 84 +++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md create mode 100644 tools/substrate-claim-checker/check-semantic-equivalence.ts diff --git a/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md b/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md new file mode 100644 index 0000000000..ee043aa63d --- /dev/null +++ b/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md @@ -0,0 +1,38 @@ +--- +id: B-0170.1 +priority: P1 +status: open +title: "Substrate-claim-checker - semantic-equivalence-drift checker" +created: 2026-05-28 +last_updated: 2026-05-28 +parent: B-0170 +depends_on: [] +classification: buildable-now +decomposition: atomic +owners: [lior] +type: tooling +--- + +# B-0170.1 — Semantic-equivalence-drift checker + +This task implements the "semantic-equivalence-drift" checker, as specified in the parent task B-0170. + +## Scope + +This checker is responsible for detecting claims of semantic equivalence between commands in documentation and verifying them. For example, if a document states that `ll` is an alias for `ls -l`, this checker should be able to validate that claim. + +### V0.1 (This task) + +The initial version of this tool will only *detect* claims of semantic equivalence. It will scan markdown files for patterns like: +- `... is equivalent to ...` +- `... is an alias for ...` +- `... is the same as ...` + +It will then report the file and line number where these claims are made. Verification of the claims is out of scope for this initial version. + +## Acceptance Criteria + +- A new script `tools/substrate-claim-checker/check-semantic-equivalence.ts` is created. +- The script can be run from the command line. +- The script scans markdown files for claims of semantic equivalence. +- The script outputs a list of found claims with their location. diff --git a/tools/substrate-claim-checker/check-semantic-equivalence.ts b/tools/substrate-claim-checker/check-semantic-equivalence.ts new file mode 100644 index 0000000000..d054017588 --- /dev/null +++ b/tools/substrate-claim-checker/check-semantic-equivalence.ts @@ -0,0 +1,84 @@ +#!/usr/bin/env bun +// check-semantic-equivalence.ts -- finds claims of semantic equivalence in markdown files. +// Part of B-0170.1. + +import fs from 'fs'; +import path from 'path'; + +const CLAIM_REGEX = /`([^`]+)`\s+(is equivalent to|is an alias for|is the same as)\s+`([^`]+)`/gi; +const IGNORE_DIRS = ['node_modules', '.git', '.vscode', '.idea', 'dist', 'build']; +const INCLUDE_EXTS = ['.md', '.mdx']; + +interface Match { + file: string; + line: number; + claim: string; +} + +function searchInFile(filePath: string): Match[] { + const matches: Match[] = []; + if (!INCLUDE_EXTS.some(ext => filePath.endsWith(ext))) { + return matches; + } + + try { + const content = fs.readFileSync(filePath, 'utf-8'); + const lines = content.split('\n'); + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + let match; + while ((match = CLAIM_REGEX.exec(line)) !== null) { + matches.push({ + file: filePath, + line: i + 1, + claim: match[0], + }); + } + } + } catch (error) { + // Ignore errors + } + + return matches; +} + +function searchInDirectory(dirPath: string): Match[] { + let allMatches: Match[] = []; + const entries = fs.readdirSync(dirPath, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dirPath, entry.name); + if (IGNORE_DIRS.includes(entry.name)) { + continue; + } + + if (entry.isDirectory()) { + allMatches = allMatches.concat(searchInDirectory(fullPath)); + } else if (entry.isFile()) { + allMatches = allMatches.concat(searchInFile(fullPath)); + } + } + + return allMatches; +} + +function main() { + const searchDir = process.argv[2] || process.cwd(); + console.log(`Searching for semantic equivalence claims in ${searchDir}...\\n`); + + const allMatches = searchInDirectory(searchDir); + + if (allMatches.length === 0) { + console.log('No semantic equivalence claims found.'); + return; + } + + console.log(`Found ${allMatches.length} potential claims:\\n`); + for (const match of allMatches) { + console.log(`- ${match.file}:${match.line}`); + console.log(` > ${match.claim}`); + } +} + +main(); From 183914c25f0d92f43217e9780c6d75ca81d29103 Mon Sep 17 00:00:00 2001 From: "Otto-CLI (Claude)" Date: Thu, 28 May 2026 20:54:40 -0400 Subject: [PATCH 2/3] fix(lint): markdownlint MD032 blank line around list (B-0170.1) Unblocks PR #5881 required lint(markdownlint) check. Additive CI fix on Lior's branch per established co-maintenance pattern. Co-Authored-By: Claude Opus 4.8 --- docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md b/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md index ee043aa63d..ecefc847cc 100644 --- a/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md +++ b/docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md @@ -24,6 +24,7 @@ This checker is responsible for detecting claims of semantic equivalence between ### V0.1 (This task) The initial version of this tool will only *detect* claims of semantic equivalence. It will scan markdown files for patterns like: + - `... is equivalent to ...` - `... is an alias for ...` - `... is the same as ...` From c55f57ccd9af317753f6a4809f366d314c510531 Mon Sep 17 00:00:00 2001 From: "Otto-CLI (Claude)" Date: Thu, 28 May 2026 21:04:08 -0400 Subject: [PATCH 3/3] fix(B-0170.1): address 4 Copilot findings in semantic-equivalence checker - exclude references/upstreams/ from directory walk (IGNORE_DIRS += upstreams) - fix \\n -> \n in template literals (was printing literal backslash-n) - guard main() behind import.meta.main (repo tools/ convention) - log read failures to stderr instead of silent swallow (avoid false negatives) Co-Authored-By: Claude --- .../check-semantic-equivalence.ts | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/substrate-claim-checker/check-semantic-equivalence.ts b/tools/substrate-claim-checker/check-semantic-equivalence.ts index d054017588..a82cac73e8 100644 --- a/tools/substrate-claim-checker/check-semantic-equivalence.ts +++ b/tools/substrate-claim-checker/check-semantic-equivalence.ts @@ -6,7 +6,10 @@ import fs from 'fs'; import path from 'path'; const CLAIM_REGEX = /`([^`]+)`\s+(is equivalent to|is an alias for|is the same as)\s+`([^`]+)`/gi; -const IGNORE_DIRS = ['node_modules', '.git', '.vscode', '.idea', 'dist', 'build']; +// `upstreams` excludes references/upstreams/ — 85+ full clones of external +// projects; walking it takes minutes and returns mostly noise (per +// .claude/rules/references-upstreams-not-our-code-search-excludes.md). +const IGNORE_DIRS = ['node_modules', '.git', '.vscode', '.idea', 'dist', 'build', 'upstreams']; const INCLUDE_EXTS = ['.md', '.mdx']; interface Match { @@ -37,7 +40,10 @@ function searchInFile(filePath: string): Match[] { } } } catch (error) { - // Ignore errors + // Surface read failures to stderr so silently-skipped files are visible + // (a swallowed read produces a false negative — "no claims" when the file + // was actually unreadable). + console.error(`warn: could not read ${filePath}: ${(error as Error).message}`); } return matches; @@ -65,7 +71,7 @@ function searchInDirectory(dirPath: string): Match[] { function main() { const searchDir = process.argv[2] || process.cwd(); - console.log(`Searching for semantic equivalence claims in ${searchDir}...\\n`); + console.log(`Searching for semantic equivalence claims in ${searchDir}...\n`); const allMatches = searchInDirectory(searchDir); @@ -74,11 +80,13 @@ function main() { return; } - console.log(`Found ${allMatches.length} potential claims:\\n`); + console.log(`Found ${allMatches.length} potential claims:\n`); for (const match of allMatches) { console.log(`- ${match.file}:${match.line}`); console.log(` > ${match.claim}`); } } -main(); +if (import.meta.main) { + main(); +}