Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
265 changes: 161 additions & 104 deletions packages/checks/src/check-magic-strings.ts
Original file line number Diff line number Diff line change
@@ -1,172 +1,229 @@
#!/usr/bin/env bun
//
// check-magic-strings.ts — flags string literals that appear in 3+ distinct source files.
//
// A string repeated across many files is a candidate for a shared constant, enum value,
// or config entry. Cross-file detection catches what per-file checks miss: a value used
// once per file but scattered across the codebase.
//
// Strings are excluded when they:
// - Contain spaces (Tailwind combos, sentences — not constant candidates)
// - Look like URLs, relative/absolute paths, hex colors, or CSS custom properties
// - Are under 3 or over 80 characters
// - Appear only on import/export/comment lines
// - Live in build artifacts: out/, dist/, build/, .next/, .expo/, node_modules/
Copy link

Copilot AI Apr 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The header comment’s excluded build artifact list doesn’t match EXCLUDED_DIRS below (it now also excludes .turbo/, coverage/, and __generated__/). Updating the comment will keep the script’s documented behavior in sync with what it actually scans.

Suggested change
// - Live in build artifacts: out/, dist/, build/, .next/, .expo/, node_modules/
// - Live in build artifacts: out/, dist/, build/, .next/, .expo/, .turbo/, coverage/, __generated__/, node_modules/

Copilot uses AI. Check for mistakes.
// - Live in test/spec/stories files
//
// Run with --strict to exit 1 on violations (default: advisory, exit 0).

import { readdirSync, readFileSync, statSync } from 'node:fs';
import { join } from 'node:path';

const ROOT = join(import.meta.dir, '..', '..', '..');
const SCAN_ROOTS = ['apps', 'packages'];
const EXCLUDED_DIRS = new Set(['node_modules', 'dist', 'build', '.next', '.expo']);

const EXCLUDED_DIRS = new Set([
'node_modules',
'dist',
'build',
'out',
'.next',
'.expo',
'.turbo',
'coverage',
'__generated__',
]);

const TARGET_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mts', '.cts']);
const EXCLUDED_FILE_PATTERNS = [/\.test\./, /\.spec\./, /\.stories\./, /\.d\.ts$/];
const RELATIVE_PATH_PATTERN = /^\.{0,2}\//;
const SLASHED_WORD_PATH_PATTERN = /^[\w.-]+(\/[\w.-]+)+$/;
const WHITESPACE_PATTERN = /\s+/;

const MIN_LITERAL_LENGTH = 4;
const MIN_OCCURRENCES_PER_FILE = 3;
const MIN_LITERAL_LENGTH = 3;
const MAX_LITERAL_LENGTH = 80;
const MIN_FILES = 3;

// These appear everywhere by design and are not worth flagging.
const ALLOWLIST = new Set([
'use client',
'dark',
'light',
'system',
'POST',
// HTTP verbs
'GET',
'POST',
'PUT',
'DELETE',
'PATCH',
'HEAD',
'OPTIONS',
// TypeScript primitive names (appear in Zod schemas, error messages)
'string',
'number',
'boolean',
'object',
'array',
'function',
'null',
'undefined',
// Boolean-string representations
'true',
'false',
// Common config/tooling directory names (appear as string args in build configs)
'node_modules',
'dist',
'build',
'apps',
'packages',
'src',
'out',
'.next',
'.expo',
]);

// Matches single-quoted and double-quoted string literals (including escaped quotes).
const STRING_LITERAL_PATTERN = /(['"])((?:\\.|(?!\1).)+)\1/g;

interface LiteralLocation {
line: number;
}

interface FileViolation {
file: string;
literal: string;
count: number;
lines: number[];
}

function isTargetFile(filePath: string): boolean {
const extension = filePath.slice(filePath.lastIndexOf('.'));
if (!TARGET_EXTENSIONS.has(extension)) return false;
return !EXCLUDED_FILE_PATTERNS.some((pattern) => pattern.test(filePath));
// Matches single- and double-quoted string literals (not across newlines).
const STRING_LITERAL = /(['"])((?:\\.|(?!\1)[^\n])+)\1/g;

// Ignore patterns — all hoisted to top level for Biome useTopLevelRegex compliance.
const RE_RELATIVE_PATH = /^\.{0,2}\//;
const RE_ABSOLUTE_PATH = /^\/[\w-]/;
const RE_HEX_COLOR = /^#[0-9a-fA-F]{3,8}$/;
const RE_SLASH_PATH = /^[\w.-]+(\/[\w.-]+)+$/;
const RE_NUMERIC = /^\d+(\.\d+)?$/;
// CSS utility class pattern (Tailwind): all lowercase+digits connected by dashes.
// Matches flex-1, text-lg, bg-primary, text-muted-foreground, space-y-4, etc.
const RE_CSS_UTILITY = /^[a-z][a-z0-9]*(-[a-z0-9]+)+$/;
const RE_SEMVER = /^\d+\.\d+\.\d+/;
// CSS dimension values: 1rem, 1.5rem, 100vh, 9999px, 0.5rem, 100%, etc.
const RE_CSS_DIMENSION = /^\d+(\.\d+)?(%|rem|em|px|vh|vw|ch|pt|ex|deg|fr|s|ms)$/;

Comment on lines +87 to +93
Copy link

Copilot AI Apr 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RE_CSS_UTILITY only excludes dashed Tailwind tokens, but many common utilities/CSS keywords are single words (e.g. 'flex'). In this repo, 'flex' appears in multiple files (e.g. apps//pages/{404,500}.tsx, packages/web-ui/src/components/) and will be flagged as a “magic string” despite being a normal Tailwind/CSS value. Consider extending the Tailwind/CSS ignore logic (e.g. allowlist common single-token utilities/keywords or broaden the utility regex).

Copilot uses AI. Check for mistakes.
function isTargetFile(relPath: string): boolean {
const ext = relPath.slice(relPath.lastIndexOf('.'));
if (!TARGET_EXTENSIONS.has(ext)) return false;
return !EXCLUDED_FILE_PATTERNS.some((p) => p.test(relPath));
}

function shouldIgnoreLiteral(value: string): boolean {
if (ALLOWLIST.has(value)) return true;
if (value.length < MIN_LITERAL_LENGTH || value.length > MAX_LITERAL_LENGTH) return true;
if (value.includes('${')) return true;
if (ALLOWLIST.has(value)) return true;
if (value.includes(' ')) return true; // multi-word: Tailwind combos, prose, not constants
if (value.includes('${')) return true; // template-literal fragment
if (value.startsWith('http://') || value.startsWith('https://')) return true;
if (RELATIVE_PATH_PATTERN.test(value)) return true;
if (SLASHED_WORD_PATH_PATTERN.test(value)) return true;
const words = value.trim().split(WHITESPACE_PATTERN);
if (words.length > 3) return true;
if (value.startsWith('#')) return true;
if (value.startsWith('--')) return true;
if (RE_RELATIVE_PATH.test(value)) return true; // relative paths
if (RE_ABSOLUTE_PATH.test(value)) return true; // absolute paths / routes
if (RE_HEX_COLOR.test(value)) return true; // hex colors
if (value.startsWith('--')) return true; // CSS custom properties
if (RE_SLASH_PATH.test(value)) return true; // slash-separated path-like
if (RE_NUMERIC.test(value)) return true; // numeric strings
if (RE_CSS_UTILITY.test(value)) return true; // Tailwind utility classes
if (value.startsWith('@')) return true; // package import paths
if (value.includes(',')) return true; // comma-separated values (MIME types, accept headers)
if (value.includes('(')) return true; // function-like (CSS functions, e.g. hsl(var(...)))
if (value.includes('*') || value.includes('?')) return true; // glob patterns
if (RE_SEMVER.test(value)) return true; // semver / version strings
if (value.includes(':')) return true; // Tailwind variant syntax (ios:, hover:), URL schemes
if (RE_CSS_DIMENSION.test(value)) return true; // CSS dimension values
return false;
}

function shouldSkipLine(line: string): boolean {
const trimmed = line.trimStart();
return (
trimmed.startsWith('import ') ||
trimmed.startsWith('export ') ||
trimmed.startsWith('//') ||
trimmed.startsWith('*') ||
trimmed.startsWith('export {') ||
trimmed.startsWith('export type {') ||
Comment on lines +127 to +128
Copy link

Copilot AI Apr 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldSkipLine only skips export { ... } / export type { ... }, but the header comment says strings on export lines should be excluded. This will start scanning export const ... = "...", export * from ..., and export default ... lines and likely introduce noisy candidates. Consider skipping all export statements (e.g. a ^\s*export\b check, similar to check-type-casts.ts’s import/export filter).

Suggested change
trimmed.startsWith('export {') ||
trimmed.startsWith('export type {') ||
/^export\b/.test(trimmed) ||

Copilot uses AI. Check for mistakes.
trimmed.startsWith('// ') ||
trimmed.startsWith('//\t') ||
trimmed === '//' ||
trimmed.startsWith('* ') ||
trimmed === '*' ||
Comment on lines +129 to +133
Copy link

Copilot AI Apr 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment skipping is narrower than “comment lines” in the header: lines like //# sourceMappingURL=..., /// <reference ...>, or //TODO: won’t match // / //\t / // and will be scanned. If the intent is to skip all single-line comments, use a broader trimmed.startsWith('//') (and similarly consider * lines in block comments).

Suggested change
trimmed.startsWith('// ') ||
trimmed.startsWith('//\t') ||
trimmed === '//' ||
trimmed.startsWith('* ') ||
trimmed === '*' ||
trimmed.startsWith('//') ||
trimmed.startsWith('*') ||

Copilot uses AI. Check for mistakes.
trimmed.startsWith('/*')
);
}

function collectFileViolations(file: string): FileViolation[] {
const fullPath = join(ROOT, file);
let content = '';
// Maps literal value → set of distinct relative file paths it appears in.
const literalFiles = new Map<string, Set<string>>();

const allFiles: string[] = [];

function collectFiles(dir: string, relDir: string): void {
let entries: string[];
try {
content = readFileSync(fullPath, 'utf8');
entries = readdirSync(dir);
} catch {
return [];
return;
}

const byLiteral = new Map<string, LiteralLocation[]>();
const lines = content.split('\n');

for (let index = 0; index < lines.length; index++) {
const line = lines[index] ?? '';
if (shouldSkipLine(line)) continue;

const matches = line.matchAll(STRING_LITERAL_PATTERN);
for (const match of matches) {
const value = match[2];
if (!value || shouldIgnoreLiteral(value)) continue;

const current = byLiteral.get(value) ?? [];
current.push({ line: index + 1 });
byLiteral.set(value, current);
for (const entry of entries) {
if (EXCLUDED_DIRS.has(entry)) continue;
const full = join(dir, entry);
const rel = `${relDir}/${entry}`;
let isDir = false;
try {
isDir = statSync(full).isDirectory();
} catch {
continue;
}
if (isDir) {
collectFiles(full, rel);
} else if (isTargetFile(rel)) {
allFiles.push(rel);
}
}

const violations: FileViolation[] = [];
for (const [literal, locations] of byLiteral.entries()) {
if (locations.length < MIN_OCCURRENCES_PER_FILE) continue;
violations.push({
file,
literal,
count: locations.length,
lines: locations.map((location) => location.line),
});
}

return violations.sort((a, b) => b.count - a.count);
}

const targetFiles: string[] = [];

function walkDir(dir: string, relDir: string): void {
let entries: string[] = [];
function scanFile(relPath: string): void {
let content: string;
try {
entries = readdirSync(dir);
content = readFileSync(join(ROOT, relPath), 'utf8');
} catch {
return;
}

for (const entry of entries) {
if (EXCLUDED_DIRS.has(entry)) continue;

const fullPath = join(dir, entry);
const relPath = `${relDir}/${entry}`;
let isDirectory = false;
const seenInFile = new Set<string>(); // one entry per file, regardless of repetition count

try {
isDirectory = statSync(fullPath).isDirectory();
} catch {
continue;
}
for (const line of content.split('\n')) {
if (shouldSkipLine(line)) continue;

if (isDirectory) {
walkDir(fullPath, relPath);
continue;
STRING_LITERAL.lastIndex = 0;
for (;;) {
const match = STRING_LITERAL.exec(line);
if (match === null) break;
const value = match[2];
if (!value || shouldIgnoreLiteral(value)) continue;
if (seenInFile.has(value)) continue;
seenInFile.add(value);
const files = literalFiles.get(value) ?? new Set<string>();
files.add(relPath);
literalFiles.set(value, files);
}

if (isTargetFile(relPath)) targetFiles.push(relPath);
}
}

for (const root of SCAN_ROOTS) {
walkDir(join(ROOT, root), root);
collectFiles(join(ROOT, root), root);
}

const violations = targetFiles.flatMap((file) => collectFileViolations(file));
for (const f of allFiles) {
scanFile(f);
}

const violations = [...literalFiles.entries()]
.filter(([, files]) => files.size >= MIN_FILES)
.sort((a, b) => b[1].size - a[1].size);

if (violations.length === 0) {
console.log('No repeated magic strings found.');
console.log(`✓ No cross-file magic strings found (scanned ${allFiles.length} files).`);
process.exit(0);
}

console.log('Magic string candidates found. Prefer constants/enums in shared config objects:\n');
for (const violation of violations) {
console.log(
`${violation.file}: "${violation.literal}" appears ${violation.count} times (lines: ${violation.lines.join(', ')})`,
);
console.log(
`Magic string candidates (${violations.length}) appearing in ${MIN_FILES}+ distinct files — prefer shared constants/enums:\n`,
);

for (const [literal, files] of violations) {
const sorted = [...files].sort();
console.log(` "${literal}" (${files.size} files)`);
for (const f of sorted) {
console.log(` ${f}`);
}
}

console.log(
'\nTip: centralize repeated literals into frozen constants (Object.freeze) or enum-like objects.',
'\nTip: move repeated literals to a frozen constant or enum in a shared package (e.g. packages/config).',
);

const strictMode = process.argv.includes('--strict');
Expand Down
Loading