Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions docs/backlog/P1/B-0170.1-semantic-equivalence-drift-checker.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
---
id: B-0170.1
priority: P1
status: open
title: "Substrate-claim-checker - semantic-equivalence-drift checker"
created: 2026-05-28
last_updated: 2026-05-28
parent: B-0170
depends_on: []
classification: buildable-now
decomposition: atomic
owners: [lior]
type: tooling
---

# B-0170.1 — Semantic-equivalence-drift checker

This task implements the "semantic-equivalence-drift" checker, as specified in the parent task B-0170.

## Scope

This checker is responsible for detecting claims of semantic equivalence between commands in documentation and verifying them. For example, if a document states that `ll` is an alias for `ls -l`, this checker should be able to validate that claim.

### V0.1 (This task)

The initial version of this tool will only *detect* claims of semantic equivalence. It will scan markdown files for patterns like:

- `<code>...</code> is equivalent to <code>...</code>`
- `<code>...</code> is an alias for <code>...</code>`
- `<code>...</code> is the same as <code>...</code>`

It will then report the file and line number where these claims are made. Verification of the claims is out of scope for this initial version.

## Acceptance Criteria

- A new script `tools/substrate-claim-checker/check-semantic-equivalence.ts` is created.
- The script can be run from the command line.
- The script scans markdown files for claims of semantic equivalence.
- The script outputs a list of found claims with their location.
92 changes: 92 additions & 0 deletions tools/substrate-claim-checker/check-semantic-equivalence.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env bun
// check-semantic-equivalence.ts -- finds claims of semantic equivalence in markdown files.
// Part of B-0170.1.

import fs from 'fs';
import path from 'path';

const CLAIM_REGEX = /`([^`]+)`\s+(is equivalent to|is an alias for|is the same as)\s+`([^`]+)`/gi;
// `upstreams` excludes references/upstreams/ — 85+ full clones of external
// projects; walking it takes minutes and returns mostly noise (per
// .claude/rules/references-upstreams-not-our-code-search-excludes.md).
const IGNORE_DIRS = ['node_modules', '.git', '.vscode', '.idea', 'dist', 'build', 'upstreams'];
const INCLUDE_EXTS = ['.md', '.mdx'];

interface Match {
file: string;
line: number;
claim: string;
}

function searchInFile(filePath: string): Match[] {
const matches: Match[] = [];
if (!INCLUDE_EXTS.some(ext => filePath.endsWith(ext))) {
return matches;
}

try {
const content = fs.readFileSync(filePath, 'utf-8');
const lines = content.split('\n');

for (let i = 0; i < lines.length; i++) {
const line = lines[i];
let match;
while ((match = CLAIM_REGEX.exec(line)) !== null) {
matches.push({
file: filePath,
line: i + 1,
claim: match[0],
});
}
}
} catch (error) {
// Surface read failures to stderr so silently-skipped files are visible
// (a swallowed read produces a false negative — "no claims" when the file
// was actually unreadable).
console.error(`warn: could not read ${filePath}: ${(error as Error).message}`);
}

return matches;
}

function searchInDirectory(dirPath: string): Match[] {
let allMatches: Match[] = [];
const entries = fs.readdirSync(dirPath, { withFileTypes: true });

for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name);
if (IGNORE_DIRS.includes(entry.name)) {
continue;
}

if (entry.isDirectory()) {
allMatches = allMatches.concat(searchInDirectory(fullPath));
} else if (entry.isFile()) {
allMatches = allMatches.concat(searchInFile(fullPath));
}
}

return allMatches;
}

function main() {
const searchDir = process.argv[2] || process.cwd();
console.log(`Searching for semantic equivalence claims in ${searchDir}...\n`);

const allMatches = searchInDirectory(searchDir);

if (allMatches.length === 0) {
console.log('No semantic equivalence claims found.');
return;
}

console.log(`Found ${allMatches.length} potential claims:\n`);
for (const match of allMatches) {
console.log(`- ${match.file}:${match.line}`);
console.log(` > ${match.claim}`);
}
}

if (import.meta.main) {
main();
}
Loading