From f3f34b78de51a193dd455203f25f3a4fc882b1d5 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 04:59:27 +0000 Subject: [PATCH 01/14] ci: deterministic check for studio/frontend dep removals Adds a CI gate that catches the common foot-gun: a dep dropped from studio/frontend/package.json that something in src/ still imports. scripts/check_frontend_dep_removal.py Diffs package.json against a git base ref, collects every package no longer declared, and for each one: 1. Greps the entire repo for any usage pattern (static / dynamic / side-effect imports, require, CSS @import, HTML script/link src, new URL(), triple-slash references, template literals, bare quoted strings in JS-like files). 2. Resolves whether the package would still install by BFS'ing the dep graph in the new lockfile starting from the new package.json's declared deps (so a stale lockfile does not give false OK-via-transitive results). 3. Distinguishes top-level node_modules/ from nested copies under other packages. Bare src/ imports only resolve to the top-level path. 4. Pip-installed playwright references are filtered, so removing the npm playwright (CI uses the pip one) is reported correctly. Additional hygiene checks (warnings, fail with --strict): - lockfile dep map matches package.json (catches drift). - @types/X is not orphaned when X is no longer declared. - No src/ import points at a package not declared in any field. tests/studio/test_frontend_dep_removal.py 24 deterministic cases. Each patches a copy of the head package.json, runs the script, and asserts (exit status, reported FAIL list). Covers: - Genuinely-breaking removals: next-themes, @xyflow/react, @huggingface/hub, dexie, motion, canvas-confetti, recharts, node-forge, mammoth, unpdf. - Safe-via-transitive removals: katex, clsx, react, @radix-ui/react-slot, zustand, tailwind-merge, remark-gfm, date-fns, js-yaml, @tauri-apps/api. - Mixed multi-removal failing on the unsafe entries only. - Non-existent / not-in-base names (no-op). - Move from deps to devDeps (not a removal). .github/workflows/studio-frontend-ci.yml Runs the checker on pull_request events against origin/${{ github.base_ref }}, plus the edge-case suite. --- .github/workflows/studio-frontend-ci.yml | 12 + scripts/check_frontend_dep_removal.py | 471 ++++++++++++++++++++++ tests/studio/test_frontend_dep_removal.py | 187 +++++++++ 3 files changed, 670 insertions(+) create mode 100755 scripts/check_frontend_dep_removal.py create mode 100644 tests/studio/test_frontend_dep_removal.py diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml index 3632125ca2..546198b8b2 100644 --- a/.github/workflows/studio-frontend-ci.yml +++ b/.github/workflows/studio-frontend-ci.yml @@ -84,6 +84,18 @@ jobs: exit 1 fi + # Catch the common foot-gun: a dep dropped from package.json that is + # still imported somewhere. The script walks the lockfile dep graph + # from the new top-level deps and only counts top-level node_modules + # paths as valid resolution targets for bare src/ imports. + - name: Dependency removal safety check + if: github.event_name == 'pull_request' + working-directory: ${{ github.workspace }} + run: | + python3 scripts/check_frontend_dep_removal.py \ + --base "origin/${{ github.base_ref }}" + python3 tests/studio/test_frontend_dep_removal.py + - name: Typecheck run: npm run typecheck diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py new file mode 100755 index 0000000000..e07dde306e --- /dev/null +++ b/scripts/check_frontend_dep_removal.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. +"""Guard against breaking npm dependency removals in studio/frontend. + +Diffs the current package.json against a git base, finds every package +that was removed, and confirms each is no longer referenced anywhere +in the repo. If a removed package is still imported and is not +transitively resolvable through the new lockfile, exits non-zero with +file:line citations. + +Usage: + python scripts/check_frontend_dep_removal.py + python scripts/check_frontend_dep_removal.py --base origin/main + python scripts/check_frontend_dep_removal.py --base HEAD~1 + python scripts/check_frontend_dep_removal.py --base-pkg PATH --base-lock PATH + +Exit codes: + 0 every removed dep is safe (no source refs or still resolvable) + 1 at least one removed dep is referenced and not resolvable + 2 invocation error (bad args, missing file, git error) +""" +from __future__ import annotations + +import argparse +import json +import os +import re +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +FRONTEND_PKG = "studio/frontend/package.json" +FRONTEND_LOCK = "studio/frontend/package-lock.json" + +DEP_FIELDS = ( + "dependencies", + "devDependencies", + "peerDependencies", + "optionalDependencies", +) + +# Sources where seeing a package name does NOT count as usage. +EXPECTED_NOISE_FILES = { + "studio/frontend/package.json", + "studio/frontend/package-lock.json", + "studio/backend/core/data_recipe/oxc-validator/package.json", + "studio/backend/core/data_recipe/oxc-validator/package-lock.json", +} + +# Only quoted-string occurrences in these file types can be module specifiers. +JS_LIKE_EXT = re.compile( + r"\.(ts|tsx|js|jsx|mjs|cjs|html|htm|css|scss|sass|json|jsonc)$" +) + +GREP_INCLUDES = [ + "--include=*.ts", "--include=*.tsx", "--include=*.js", "--include=*.jsx", + "--include=*.mjs", "--include=*.cjs", + "--include=*.html", "--include=*.htm", + "--include=*.css", "--include=*.scss", "--include=*.sass", + "--include=*.json", "--include=*.jsonc", + "--include=*.md", "--include=*.mdx", + "--include=*.py", "--include=*.rs", "--include=*.toml", + "--include=*.yml", "--include=*.yaml", + "--include=*.sh", "--include=*.ps1", "--include=*.bat", + "--include=Dockerfile*", +] +GREP_EXCLUDES = [ + "--exclude-dir=node_modules", + "--exclude-dir=dist", + "--exclude-dir=.git", + "--exclude-dir=__pycache__", + "--exclude-dir=target", + "--exclude-dir=.next", + "--exclude-dir=build", + "--exclude-dir=.venv", + "--exclude-dir=venv", +] + +# A pip-installed playwright reference is the PyPI package, not npm. +PIP_PLAYWRIGHT = re.compile( + r"(pip\s+install\s+['\"]?playwright" + r"|python\s+-m\s+playwright" + r"|from\s+playwright" + r"|^\s*import\s+playwright)" +) + + +@dataclass +class Hit: + file: str + line: int + kind: str + snippet: str + + +def run(cmd: list[str], cwd: Path | None = None) -> str: + """Run a command, return stdout. On non-zero exit, return ''.""" + res = subprocess.run( + cmd, cwd=cwd or REPO_ROOT, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, text=True, + ) + return res.stdout if res.returncode == 0 else "" + + +def read_pkg_at(base: str, path: str) -> dict: + """Read JSON at `base:path` via git show. Empty dict if missing.""" + out = run(["git", "show", f"{base}:{path}"]) + if not out.strip(): + return {} + return json.loads(out) + + +def read_pkg_file(path: Path) -> dict: + if not path.exists(): + return {} + return json.loads(path.read_text()) + + +def all_decl_names(pkg: dict) -> set[str]: + names: set[str] = set() + for field in DEP_FIELDS: + names.update((pkg.get(field) or {}).keys()) + return names + + +def lock_resolvable(lock: dict, name: str) -> list[str]: + """Return lockfile paths that still install `name` (transitive ok). + Naive: any path matching the name. May give false positives on a stale + lockfile. Prefer `reachable_from_head` for sync-aware analysis. + """ + pkgs = lock.get("packages", {}) + hits = [] + for path in pkgs: + if path == f"node_modules/{name}" or path.endswith(f"/node_modules/{name}"): + hits.append(path) + return hits + + +def _resolve_install_path(parent_path: str, name: str, pkgs: dict) -> str | None: + """Walk up the nested node_modules chain from `parent_path` to find + where `name` actually resolves. Mirrors Node module resolution. + """ + parts = parent_path.split("/node_modules/") + for i in range(len(parts), 0, -1): + prefix = "/node_modules/".join(parts[:i]) + trial = (prefix + "/node_modules/" if prefix else "node_modules/") + name + if trial in pkgs: + return trial + if f"node_modules/{name}" in pkgs: + return f"node_modules/{name}" + return None + + +def _deps_of(meta: dict) -> dict: + out = {} + for field in ("dependencies", "peerDependencies", "optionalDependencies"): + out.update(meta.get(field) or {}) + return out + + +def reachable_from_head(head_pkg: dict, lock: dict) -> set[str]: + """BFS the lockfile dep graph starting from `head_pkg`'s top-level + declared deps. Returns the set of lockfile install paths that survive. + Stale lockfile entries (orphaned by the new package.json) are excluded. + """ + pkgs = lock.get("packages", {}) + if not pkgs: + return set() + roots = all_decl_names(head_pkg) + seen: set[str] = set() + frontier: list[str] = [] + for name in roots: + p = _resolve_install_path("", name, pkgs) + if p: + frontier.append(p) + while frontier: + path = frontier.pop() + if path in seen: + continue + seen.add(path) + meta = pkgs.get(path, {}) + for dep_name in _deps_of(meta): + p = _resolve_install_path(path, dep_name, pkgs) + if p and p not in seen: + frontier.append(p) + return seen + + +def classify(pkg: str, file: str, content: str) -> str | None: + """Return why `content` references `pkg`, or None.""" + esc = re.escape(pkg) + if re.search(rf"import\s+(?:[^'\";]*?\s+from\s+)?['\"]{esc}(?:/[^'\"]*)?['\"]", content): + return "static_import" + if re.search(rf"import\s+['\"]{esc}(?:/[^'\"]*)?['\"]", content): + return "side_effect_import" + if re.search(rf"import\(\s*['\"]{esc}(?:/[^'\"]*)?['\"]\s*\)", content): + return "dynamic_import" + if re.search(rf"require(?:\.resolve)?\(\s*['\"]{esc}(?:/[^'\"]*)?['\"]\s*\)", content): + return "require" + if re.search(rf"@import\s+['\"]{esc}", content): + return "css_import" + if re.search(rf"]*src\s*=\s*['\"][^'\"]*/{esc}", content): + return "html_script" + if re.search(rf"]*href\s*=\s*['\"][^'\"]*/{esc}", content): + return "html_link" + if re.search(rf"///\s* list[str]: + """Return a list of warnings if package-lock.json's dep map + disagrees with package.json (i.e., npm install was not re-run). + """ + warnings = [] + if not head_lock: + return warnings + root = head_lock.get("packages", {}).get("", {}) + lock_decl = {**(root.get("dependencies") or {}), + **(root.get("devDependencies") or {}), + **(root.get("peerDependencies") or {}), + **(root.get("optionalDependencies") or {})} + pkg_decl = {} + for f in DEP_FIELDS: + pkg_decl.update(head_pkg.get(f) or {}) + only_in_lock = set(lock_decl) - set(pkg_decl) + only_in_pkg = set(pkg_decl) - set(lock_decl) + if only_in_lock: + warnings.append(f"lockfile lists deps not in package.json (lockfile stale): {sorted(only_in_lock)}") + if only_in_pkg: + warnings.append(f"package.json declares deps not in lockfile (run npm install): {sorted(only_in_pkg)}") + return warnings + + +def types_orphan_warnings(head_pkg: dict) -> list[str]: + """Flag @types/ deps where is no longer declared anywhere + in package.json. Removing X without also dropping @types/X leaves + dangling type packages. + """ + decl = set() + for f in DEP_FIELDS: + decl.update((head_pkg.get(f) or {}).keys()) + warnings = [] + for name in decl: + if not name.startswith("@types/"): + continue + # @types/foo provides types for `foo` + # @types/foo-bar provides types for `foo-bar` + # @types/scope__pkg provides types for `@scope/pkg` + target = name[len("@types/"):] + if "__" in target: + scope, sub = target.split("__", 1) + target = f"@{scope}/{sub}" + if target == "node": + continue # Node.js types are always implicit + if target not in decl: + warnings.append(f"@types/{target.replace('@', '').replace('/', '__')} present but '{target}' is not declared") + return warnings + + +def find_imports_without_decl(head_pkg: dict) -> list[tuple[str, int, str]]: + """Reverse check: find bare-specifier imports in studio/frontend/src + that don't correspond to any declared package.json dep. Catches the + case where someone adds an import but forgets the dep declaration. + Returns (file, line, spec) tuples. + """ + decl = set() + for f in DEP_FIELDS: + decl.update((head_pkg.get(f) or {}).keys()) + # Also: anything tsconfig path-aliases (just '@/...' here) is internal + pattern = r"(?:from|import)\s+['\"]([^'\"./][^'\"]*)['\"]" + args = ["grep", "-rnE", pattern, + "--include=*.ts", "--include=*.tsx", "--include=*.js", "--include=*.jsx", + "studio/frontend/src"] + out = run(args) + missing = [] + for line in out.splitlines(): + m = re.match(r"^(?:\./)?([^:]+):(\d+):(.*)$", line) + if not m: + continue + file, ln, content = m.group(1), int(m.group(2)), m.group(3) + for spec_match in re.finditer(pattern, content): + spec = spec_match.group(1) + # Resolve to package name (strip subpath) + if spec.startswith("@"): + parts = spec.split("/", 2) + pkg_name = "/".join(parts[:2]) if len(parts) >= 2 else spec + else: + pkg_name = spec.split("/", 1)[0] + if pkg_name in decl: + continue + # Internal aliases like '@/foo' or starts with builtin names + if pkg_name == "@": + continue + if pkg_name in {"node:fs", "node:path", "fs", "path", "url", "stream", + "crypto", "buffer", "util", "events", "child_process"}: + continue + missing.append((file, ln, spec)) + return missing + + +def grep_repo(pat: str) -> list[tuple[str, int, str]]: + args = ["grep", "-rnE", pat] + GREP_INCLUDES + GREP_EXCLUDES + ["."] + out = run(args) + rows = [] + for line in out.splitlines(): + m = re.match(r"^(\./)?([^:]+):(\d+):(.*)$", line) + if m: + rows.append((m.group(2), int(m.group(3)), m.group(4))) + return rows + + +def find_usage(pkg: str) -> list[Hit]: + """Return real usages of `pkg`. Filters pip-playwright separately.""" + rows = grep_repo(re.escape(pkg)) + hits = [] + for file, lineno, content in rows: + if pkg == "playwright" and PIP_PLAYWRIGHT.search(content): + continue + kind = classify(pkg, file, content) + if kind: + hits.append(Hit(file, lineno, kind, content[:160])) + return hits + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter) + p.add_argument("--base", default="origin/main", + help="git ref to diff against (default: origin/main). " + "Examples: HEAD~1, main, a-tag, a-sha.") + p.add_argument("--base-pkg", help="optional override: read base package.json from this path") + p.add_argument("--base-lock", help="optional override: read base lockfile from this path") + p.add_argument("--head-pkg", default=str(REPO_ROOT / FRONTEND_PKG), + help="head package.json path (default: working tree)") + p.add_argument("--head-lock", default=str(REPO_ROOT / FRONTEND_LOCK), + help="head lockfile path (default: working tree)") + p.add_argument("--verbose", action="store_true") + p.add_argument("--strict", action="store_true", + help="Also fail on hygiene warnings (lockfile sync, " + "@types orphans, imports without declared dep).") + args = p.parse_args() + + if args.base_pkg: + base_pkg = read_pkg_file(Path(args.base_pkg)) + else: + base_pkg = read_pkg_at(args.base, FRONTEND_PKG) + head_pkg = read_pkg_file(Path(args.head_pkg)) + if not base_pkg: + print(f"ERROR: could not read base package.json at {args.base}:{FRONTEND_PKG}", file=sys.stderr) + return 2 + if not head_pkg: + print(f"ERROR: could not read head package.json at {args.head_pkg}", file=sys.stderr) + return 2 + + if args.base_lock: + head_lock = read_pkg_file(Path(args.head_lock)) + else: + head_lock = read_pkg_file(Path(args.head_lock)) + + base_names = all_decl_names(base_pkg) + head_names = all_decl_names(head_pkg) + removed = sorted(base_names - head_names) + + if not removed: + print("[OK] no dependencies removed from studio/frontend/package.json") + return 0 + + print(f"Checking {len(removed)} removed package(s) from studio/frontend/package.json") + print(f"Base: {args.base} Head: working tree") + print() + + reachable_paths = reachable_from_head(head_pkg, head_lock) if head_lock else set() + + def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: + """Return (top_level_path, nested_paths). top_level is what bare + `import "name"` from src/ actually resolves to; nested copies are + only visible inside the parent package that nested them. + """ + top = f"node_modules/{name}" + top_path = top if top in reachable_paths else None + nested = sorted( + p for p in reachable_paths + if p != top and p.endswith(f"/node_modules/{name}") + ) + return top_path, nested + + failures: list[tuple[str, list[Hit]]] = [] + for name in removed: + hits = find_usage(name) + top, nested = reachable_install_paths(name) + importable_top_level = top is not None + # Source imports of bare specifier `name` resolve ONLY to top-level + # node_modules/. Nested copies under another package are + # invisible to src/ files. + if hits and not importable_top_level: + status = "FAIL" + elif hits and importable_top_level: + status = "OK-via-transitive" + else: + status = "OK" + print(f" [{status}] {name}") + if top: + print(f" reachable (top-level): {top}") + if nested: + print(f" reachable (nested, NOT importable from src/): {nested[0]}" + + (f" (+{len(nested)-1} more)" if len(nested) > 1 else "")) + if hits: + for h in hits[:5]: + print(f" [{h.kind}] {h.file}:{h.line} {h.snippet}") + if status == "FAIL": + failures.append((name, hits)) + if args.verbose and not hits and not (top or nested): + print(" no references, not reachable -- clean removal") + + print() + + # Additional hygiene checks (warnings unless --strict). + sync_warns = lockfile_root_sync(head_pkg, head_lock) + if sync_warns: + print("Lockfile sync warnings:") + for w in sync_warns: + print(f" - {w}") + print() + types_warns = types_orphan_warnings(head_pkg) + if types_warns: + print("@types orphan warnings:") + for w in types_warns: + print(f" - {w}") + print() + missing_imports = find_imports_without_decl(head_pkg) + if missing_imports: + print(f"Imports without a matching package.json dep ({len(missing_imports)}):") + for file, ln, spec in missing_imports[:20]: + print(f" - {file}:{ln} imports '{spec}'") + print() + + strict_failures = bool(failures) or ( + args.strict and (sync_warns or types_warns or missing_imports) + ) + + if failures: + print(f"FAIL: {len(failures)} removed package(s) still referenced and not resolvable") + for name, _ in failures: + print(f" - {name}") + return 1 + if strict_failures: + print("FAIL (--strict): one or more hygiene warnings present") + return 1 + + print("PASS: all removed packages are safe to drop") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py new file mode 100644 index 0000000000..93851b0fde --- /dev/null +++ b/tests/studio/test_frontend_dep_removal.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. +"""Edge-case suite for scripts/check_frontend_dep_removal.py. + +Each case patches a copy of studio/frontend/package.json to remove (or +move) a specific dependency, invokes the checker against the real +working tree's lockfile, and asserts the verdict matches expectations. + +Run: + python tests/studio/test_frontend_dep_removal.py + +Exits 0 iff every case behaves as expected. +""" +from __future__ import annotations + +import json +import os +import subprocess +import sys +import tempfile +from dataclasses import dataclass +from pathlib import Path + +REPO = Path(__file__).resolve().parents[2] +HEAD_PKG = REPO / "studio/frontend/package.json" +HEAD_LOCK = REPO / "studio/frontend/package-lock.json" +SCRIPT = REPO / "scripts/check_frontend_dep_removal.py" + + +@dataclass +class Case: + id: str + desc: str + remove: list[str] + expected_status: str # "PASS" | "FAIL" + expected_failures: list[str] + move_to_dev: list[str] | None = None # rare: deps moved, not removed + + +CASES: list[Case] = [ + Case("C1", "removing next-themes breaks 2 src imports", + ["next-themes"], "FAIL", ["next-themes"]), + Case("C2", "removing @xyflow/react breaks recipe-studio src imports " + "(no other declared dep pulls @xyflow/react)", + ["@xyflow/react"], "FAIL", ["@xyflow/react"]), + Case("C3", "removing katex is safe: streamdown/math, mermaid, " + "rehype-katex all keep it at top level", + ["katex"], "PASS", []), + Case("C4", "removing clsx is safe: streamdown keeps it", + ["clsx"], "PASS", []), + Case("C5", "removing react is safe: peer of countless packages", + ["react"], "PASS", []), + Case("C6", "removing @radix-ui/react-slot is safe: pulled by " + "radix-ui umbrella + @assistant-ui/react", + ["@radix-ui/react-slot"], "PASS", []), + Case("C7", "removing zustand is safe: @assistant-ui/react keeps " + "top-level zustand@5.x (nested xyflow 4.x is irrelevant " + "to src imports)", + ["zustand"], "PASS", []), + Case("C8", "multi-remove with mixed safety: next-themes + " + "@huggingface/hub + dexie all unsafe", + ["next-themes", "@huggingface/hub", "dexie"], "FAIL", + ["next-themes", "@huggingface/hub", "dexie"]), + Case("C9", "removing @huggingface/hub breaks 5+ src imports", + ["@huggingface/hub"], "FAIL", ["@huggingface/hub"]), + Case("C10", "removing tailwind-merge is safe: streamdown keeps it", + ["tailwind-merge"], "PASS", []), + Case("C11", "removing a non-existent name is a no-op", + ["__never_existed_in_pkg__"], "PASS", []), + Case("C12", "moving @hugeicons/react from deps to devDeps is NOT a " + "removal (still declared)", + [], "PASS", [], move_to_dev=["@hugeicons/react"]), + Case("C13", "removing @huggingface/hub AND @xyflow/react together: both " + "are root-only deps with no other parents, so both should FAIL", + ["@huggingface/hub", "@xyflow/react"], "FAIL", + ["@huggingface/hub", "@xyflow/react"]), + Case("C14", "removing dexie breaks src imports (no other declared " + "dep needs it)", + ["dexie"], "FAIL", ["dexie"]), + Case("C15", "removing motion (used in 20+ src imports including " + "framer-motion-style animations); no transitive parent", + ["motion"], "FAIL", ["motion"]), + Case("C16", "removing canvas-confetti (imported in confetti.tsx); " + "no transitive parent", + ["canvas-confetti"], "FAIL", ["canvas-confetti"]), + Case("C17", "removing recharts (imported in chart.tsx); no transitive " + "parent", + ["recharts"], "FAIL", ["recharts"]), + Case("C18", "removing js-yaml is safe: @eslint/eslintrc keeps it " + "(triggers @types/js-yaml orphan warning, non-fatal)", + ["js-yaml"], "PASS", []), + Case("C19", "removing node-forge (imported in providers-api.ts); " + "no transitive parent", + ["node-forge"], "FAIL", ["node-forge"]), + Case("C20", "removing @tauri-apps/api is safe: all 5 @tauri-apps " + "plugins declare it as a direct dep", + ["@tauri-apps/api"], "PASS", []), + Case("C21", "removing mammoth (imported in runtime-provider.tsx); " + "no transitive parent", + ["mammoth"], "FAIL", ["mammoth"]), + Case("C22", "removing unpdf (imported in runtime-provider.tsx); " + "no transitive parent", + ["unpdf"], "FAIL", ["unpdf"]), + Case("C23", "removing remark-gfm is safe: streamdown declares it " + "as a direct dep", + ["remark-gfm"], "PASS", []), + Case("C24", "removing date-fns is safe: react-day-picker and " + "@base-ui/react both declare it as a direct dep", + ["date-fns"], "PASS", []), +] + + +def synth_head(head_pkg: dict, case: Case) -> dict: + out = json.loads(json.dumps(head_pkg)) + for name in case.remove: + for field in ("dependencies", "devDependencies", + "peerDependencies", "optionalDependencies"): + (out.get(field) or {}).pop(name, None) + if case.move_to_dev: + for name in case.move_to_dev: + v = (out.get("dependencies") or {}).pop(name, None) + if v is not None: + out.setdefault("devDependencies", {})[name] = v + return out + + +def run_case(case: Case, head_pkg: dict) -> tuple[bool, str]: + synth = synth_head(head_pkg, case) + with tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) as f: + json.dump(synth, f, indent=2) + synth_path = f.name + try: + proc = subprocess.run( + [sys.executable, str(SCRIPT), + "--base-pkg", str(HEAD_PKG), + "--head-pkg", synth_path, + "--head-lock", str(HEAD_LOCK)], + capture_output=True, text=True, + ) + finally: + os.unlink(synth_path) + + actual_status = {0: "PASS", 1: "FAIL"}.get(proc.returncode, f"RC{proc.returncode}") + failure_pkgs: list[str] = [] + in_summary = False + for line in proc.stdout.splitlines(): + if "FAIL:" in line and "removed package" in line: + in_summary = True + continue + if in_summary and line.strip().startswith("- "): + failure_pkgs.append(line.strip()[2:]) + + ok = ( + actual_status == case.expected_status + and set(failure_pkgs) == set(case.expected_failures) + ) + return ok, ( + f"expected: status={case.expected_status} fails={sorted(case.expected_failures)}\n" + f"actual: status={actual_status} fails={sorted(failure_pkgs)}\n" + f"--- stdout (first 30 lines) ---\n" + + "\n".join(proc.stdout.splitlines()[:30]) + ) + + +def main() -> int: + head_pkg = json.loads(HEAD_PKG.read_text()) + print(f"Running {len(CASES)} edge cases against {SCRIPT.relative_to(REPO)}") + print() + results: list[tuple[Case, bool, str]] = [] + for c in CASES: + ok, detail = run_case(c, head_pkg) + results.append((c, ok, detail)) + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {c.id}: {c.desc}") + if not ok: + for line in detail.splitlines(): + print(f" {line}") + print() + passed = sum(1 for _, ok, _ in results if ok) + total = len(results) + print(f"{passed}/{total} edge cases pass") + return 0 if passed == total else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) From 667cee60dc334173f3097fed3ff5c7f3bc9398a2 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 05:05:26 +0000 Subject: [PATCH 02/14] scripts: harden frontend dep removal check + adversarial suite classify() now catches sneaky shapes that an earlier line-only scan would miss: - multi-line `import { a, b } from "pkg"` and the same shape for `export { ... } from "pkg"` / `export * from "pkg"` / `export type ... from "pkg"`. - JSDoc `@import("pkg")` references. - Word-boundary fix so `foo` no longer matches `foobar` (subpath gate: after the package name we require closing quote or `/`). - Negative-lookbehind on `(? set[str]: def classify(pkg: str, file: str, content: str) -> str | None: - """Return why `content` references `pkg`, or None.""" + """Return why `content` references `pkg`, or None. + + `content` may span multiple lines (for multi-line imports/exports); + each pattern uses re.DOTALL where it matters. The bare-spec + regexes use a word-boundary check on the package name so that + `foobar` does not match `foo`. + """ esc = re.escape(pkg) - if re.search(rf"import\s+(?:[^'\";]*?\s+from\s+)?['\"]{esc}(?:/[^'\"]*)?['\"]", content): + # Subpath gate: after the package name, the next char must be either + # the closing quote, `/`, or end-of-string. Prevents foo matching foobar. + sub = r"(?:/[^'\"`]*)?" + + flags_dotall = re.DOTALL | re.MULTILINE + + # CSS @import is checked first so it does not collide with the + # side-effect-import regex below. + if re.search(rf"@import\s+['\"]{esc}{sub}['\"]", content): + return "css_import" + # Static imports: handle multi-line `import { ... } from "pkg"` by + # allowing arbitrary content (newlines included) between `import` + # and `from`. The non-greedy match plus the required `from` keeps + # this scoped to a single statement. + if re.search(rf"(?]*src\s*=\s*['\"][^'\"]*/{esc}", content): return "html_script" if re.search(rf"]*href\s*=\s*['\"][^'\"]*/{esc}", content): return "html_link" - if re.search(rf"///\s* list[tuple[str, int, str]]: return rows +_file_lines_cache: dict[str, list[str]] = {} + + +def _read_file(path: str) -> list[str]: + if path not in _file_lines_cache: + try: + _file_lines_cache[path] = Path(path).read_text(errors="replace").splitlines() + except (OSError, UnicodeDecodeError): + _file_lines_cache[path] = [] + return _file_lines_cache[path] + + def find_usage(pkg: str) -> list[Hit]: - """Return real usages of `pkg`. Filters pip-playwright separately.""" + """Return real usages of `pkg`. Filters pip-playwright separately. + + For each filename returned by grep, also feed a multi-line window + around the matching line into classify() so multi-line imports + (`import {\n a\n} from "pkg"`) get picked up. + """ rows = grep_repo(re.escape(pkg)) hits = [] + seen_keys: set[tuple[str, str]] = set() for file, lineno, content in rows: if pkg == "playwright" and PIP_PLAYWRIGHT.search(content): continue + # Try the single-line classify first. kind = classify(pkg, file, content) + if not kind: + # Multi-line window: 4 lines above + the line itself + 4 below. + lines = _read_file(file) + lo = max(0, lineno - 5) + hi = min(len(lines), lineno + 4) + window = "\n".join(lines[lo:hi]) + kind = classify(pkg, file, window) if kind: + key = (file, kind) + if key in seen_keys: + continue + seen_keys.add(key) hits.append(Hit(file, lineno, kind, content[:160])) return hits diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py index 93851b0fde..549223aac6 100644 --- a/tests/studio/test_frontend_dep_removal.py +++ b/tests/studio/test_frontend_dep_removal.py @@ -163,6 +163,325 @@ def run_case(case: Case, head_pkg: dict) -> tuple[bool, str]: ) +# --------------------------------------------------------------------------- +# Classifier unit tests: feed hand-crafted snippets directly into classify() +# and assert the returned kind. Covers sneaky import shapes that an +# adversarial / careless dev might use to obscure a real usage. +# --------------------------------------------------------------------------- + +# Import the script's classify() by file path so this test does not need +# the package to be installed. +import importlib.util as _ilu +_spec = _ilu.spec_from_file_location("_dep_check", str(SCRIPT)) +_dep_check = _ilu.module_from_spec(_spec) +sys.modules["_dep_check"] = _dep_check # required so @dataclass can resolve annotations +_spec.loader.exec_module(_dep_check) +classify = _dep_check.classify + + +@dataclass +class ClassifyCase: + id: str + desc: str + pkg: str + file: str + content: str + expected_kind: str | None # None means "no detection" + + +CLASSIFY_CASES: list[ClassifyCase] = [ + # Bog-standard shapes + ClassifyCase("U01", "single-line static import", + "next-themes", "src/x.tsx", + 'import { ThemeProvider } from "next-themes";', + "static_import"), + ClassifyCase("U02", "side-effect import", + "katex", "src/x.tsx", + 'import "katex/dist/katex.min.css";', + "side_effect_import"), + ClassifyCase("U03", "dynamic import", + "@tauri-apps/api", "src/x.tsx", + 'const { x } = await import("@tauri-apps/api/window");', + "dynamic_import"), + ClassifyCase("U04", "require()", + "lodash", "src/x.js", + 'const _ = require("lodash");', + "require"), + ClassifyCase("U05", "CSS @import", + "tailwindcss", "src/x.css", + '@import "tailwindcss";', + "css_import"), + # Sneaky shapes + ClassifyCase("U06", "multi-line static import", + "next-themes", "src/x.tsx", + 'import {\n ThemeProvider,\n useTheme,\n} from "next-themes";', + "static_import"), + ClassifyCase("U07", "import type", + "@huggingface/hub", "src/x.ts", + 'import type { PipelineType } from "@huggingface/hub";', + "static_import"), + ClassifyCase("U08", "export * from re-export", + "@some-org/secrets", "src/x.ts", + 'export * from "@some-org/secrets";', + "re_export"), + ClassifyCase("U09", "export { x } from re-export", + "lodash-es", "src/x.ts", + 'export { foo, bar } from "lodash-es";', + "re_export"), + ClassifyCase("U10", "export type ... from re-export", + "@huggingface/hub", "src/x.ts", + 'export type { Foo } from "@huggingface/hub";', + "re_export"), + ClassifyCase("U11", "multi-line export from re-export", + "@some/pkg", "src/x.ts", + 'export {\n thing,\n other,\n} from "@some/pkg";', + "re_export"), + ClassifyCase("U12", "JSDoc @import", + "react", "src/x.ts", + '/** @type {import("react").FC} */\nconst Foo = () => null;', + "dynamic_import"), + ClassifyCase("U13", "template literal package path", + "@assistant-ui/react", "src/x.tsx", + 'const url = `@assistant-ui/react`;', + "template_literal"), + ClassifyCase("U14", "new URL import-meta", + "monaco-editor", "src/x.ts", + 'new URL("monaco-editor/esm/vs/editor/editor.worker", import.meta.url);', + "new_url"), + ClassifyCase("U15", "tsc triple-slash type ref", + "@types/some-pkg", "src/x.ts", + '/// ', + "tsc_triple_slash"), + ClassifyCase("U16", "HTML script src", + "alpinejs", "index.html", + '', + "html_script"), + ClassifyCase("U17", "HTML link href", + "alpinejs", "index.html", + '', + "html_link"), + ClassifyCase("U18", "bare quoted string in tsconfig paths", + "@huggingface/hub", "tsconfig.json", + '"paths": { "hf": ["@huggingface/hub/*"] }', + "string_literal"), + ClassifyCase("U19", "vite alias key", + "@dagrejs/dagre", "vite.config.ts", + '"@dagrejs/dagre": path.resolve(__dirname, "./..."),', + "string_literal"), + # False-positive guards (these should NOT detect) + ClassifyCase("U20", "different package with shared prefix", + "foo", "src/x.ts", + 'import { x } from "foobar";', + None), + ClassifyCase("U21", "package mentioned in plain comment text", + "react", "src/x.ts", + '// We migrated from react-router to tanstack-router', + None), + ClassifyCase("U22", "package name as a URL path tail is NOT detected " + "(boundary rule: pkg must be followed by quote or `/`)", + "react", "src/x.ts", + 'const docs = "https://example.com/react";', + None), + ClassifyCase("U23", "package name in Python file (ignored, " + "Python can never import npm packages)", + "playwright", "tests/x.py", + 'label: str = "playwright"', + None), + ClassifyCase("U24", "exact-prefix collision: pkg 'lodash' and 'lodash-es'", + "lodash", "src/x.ts", + 'import _ from "lodash-es";', + None), + ClassifyCase("U25", "scoped pkg substring collision", + "@radix-ui/react-label", "src/x.ts", + 'import x from "@radix-ui/react-label-extra";', + None), + ClassifyCase("U26", "package only mentioned in a markdown link", + "react", "README.md", + "See [react](https://react.dev).", + None), + ClassifyCase("U27", "side-effect import with subpath", + "katex", "src/x.css", + '@import "katex/dist/katex.min.css";', + "css_import"), + ClassifyCase("U28", "require.resolve", + "lodash", "build/x.cjs", + 'const path = require.resolve("lodash/fp");', + "require"), +] + + +def run_classify_unit_tests() -> int: + passed = 0 + for c in CLASSIFY_CASES: + actual = classify(c.pkg, c.file, c.content) + ok = actual == c.expected_kind + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {c.id}: {c.desc}") + if not ok: + print(f" pkg={c.pkg!r} file={c.file!r}") + print(f" content={c.content!r}") + print(f" expected={c.expected_kind!r}, actual={actual!r}") + if ok: + passed += 1 + print() + print(f"{passed}/{len(CLASSIFY_CASES)} classify-unit cases pass") + return 0 if passed == len(CLASSIFY_CASES) else 1 + + +# --------------------------------------------------------------------------- +# Adversarial end-to-end cases: drop a sneaky synthetic file into src/, +# run the checker, then clean up. Catches the case where pattern detection +# regresses for a real grep+classify pipeline (not just classify in isolation). +# --------------------------------------------------------------------------- + +ADVERSARIAL_TMP_DIR = REPO / "studio/frontend/src/__dep_check_adversarial__" + + +@dataclass +class AdvCase: + id: str + desc: str + filename: str + content: str + target_pkg: str + expected_status: str + expected_failures: list[str] + + +ADV_CASES: list[AdvCase] = [ + AdvCase("A01", "multi-line import of removed pkg should FAIL", + "adv01.ts", + 'import {\n foo,\n bar,\n} from "__adv_only_pkg_a__";\n', + "__adv_only_pkg_a__", + "FAIL", ["__adv_only_pkg_a__"]), + AdvCase("A02", "export * from removed pkg should FAIL", + "adv02.ts", + 'export * from "__adv_only_pkg_b__";\n', + "__adv_only_pkg_b__", + "FAIL", ["__adv_only_pkg_b__"]), + AdvCase("A03", "export { x } from removed pkg should FAIL", + "adv03.ts", + 'export { foo, bar } from "__adv_only_pkg_c__";\n', + "__adv_only_pkg_c__", + "FAIL", ["__adv_only_pkg_c__"]), + AdvCase("A04", "export type ... from removed pkg should FAIL", + "adv04.ts", + 'export type { Foo } from "__adv_only_pkg_d__";\n', + "__adv_only_pkg_d__", + "FAIL", ["__adv_only_pkg_d__"]), + AdvCase("A05", "package with similar prefix should NOT trigger FAIL", + "adv05.ts", + # The file imports __adv_only_pkg_e_extra__, but we will try + # to "remove" the shorter __adv_only_pkg_e__ name. The shorter + # name has zero real usage, so removal must be safe. + 'import x from "__adv_only_pkg_e_extra__";\n', + "__adv_only_pkg_e__", + "PASS", []), + AdvCase("A06", "dynamic import of removed pkg should FAIL", + "adv06.ts", + 'const m = await import("__adv_only_pkg_f__");\n', + "__adv_only_pkg_f__", + "FAIL", ["__adv_only_pkg_f__"]), + AdvCase("A07", "new URL of removed pkg should FAIL", + "adv07.ts", + 'const w = new URL("__adv_only_pkg_g__/worker.js", import.meta.url);\n', + "__adv_only_pkg_g__", + "FAIL", ["__adv_only_pkg_g__"]), + AdvCase("A08", "string-concat dynamic import is unanalyzable (PASS)", + "adv08.ts", + 'const m = await import("__adv_only_" + "pkg_h__");\n', + "__adv_only_pkg_h__", + "PASS", []), + AdvCase("A09", "package referenced only inside a JS comment " + "is conservatively flagged via the string_literal fallback " + "(this is acceptable -- err on the side of caution)", + "adv09.ts", + '// TODO: import x from "__adv_only_pkg_i__"\n', + "__adv_only_pkg_i__", + "FAIL", ["__adv_only_pkg_i__"]), + AdvCase("A10", "package referenced only in a Python file should " + "NOT trigger a JS FAIL", + "adv10.py", + 'label = "__adv_only_pkg_j__"\n', + "__adv_only_pkg_j__", + "PASS", []), + AdvCase("A11", "package mentioned in a markdown doc file is " + "ignored by JS-like-only string_literal", + "adv11.md", + 'See [docs](https://example.com/__adv_only_pkg_k__).\n', + "__adv_only_pkg_k__", + "PASS", []), + AdvCase("A12", "JSDoc @import of removed pkg should FAIL", + "adv12.ts", + '/** @type {import("__adv_only_pkg_l__").Foo} */\n' + 'const x = null;\n', + "__adv_only_pkg_l__", + "FAIL", ["__adv_only_pkg_l__"]), +] + + +def run_adversarial_cases() -> int: + ADVERSARIAL_TMP_DIR.mkdir(parents=True, exist_ok=True) + head_pkg = json.loads(HEAD_PKG.read_text()) + passed = 0 + for ac in ADV_CASES: + # Drop the synthetic file. + fpath = ADVERSARIAL_TMP_DIR / ac.filename + try: + fpath.write_text(ac.content) + # Build a synthetic base that has the target pkg added; head + # is the real head (without it). The script sees the pkg as + # removed and scans the repo, which now includes our file. + synth_base = json.loads(json.dumps(head_pkg)) + synth_base.setdefault("dependencies", {})[ac.target_pkg] = "^1.0.0" + with tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) as f: + json.dump(synth_base, f, indent=2) + base_path = f.name + try: + proc = subprocess.run( + [sys.executable, str(SCRIPT), + "--base-pkg", base_path, + "--head-pkg", str(HEAD_PKG), + "--head-lock", str(HEAD_LOCK)], + capture_output=True, text=True, cwd=str(REPO), + ) + finally: + os.unlink(base_path) + actual_status = {0: "PASS", 1: "FAIL"}.get(proc.returncode, f"RC{proc.returncode}") + fails = [] + in_summary = False + for line in proc.stdout.splitlines(): + if "FAIL:" in line and "removed package" in line: + in_summary = True + continue + if in_summary and line.strip().startswith("- "): + fails.append(line.strip()[2:]) + ok = (actual_status == ac.expected_status + and set(fails) == set(ac.expected_failures)) + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {ac.id}: {ac.desc}") + if not ok: + print(f" expected: status={ac.expected_status} fails={ac.expected_failures}") + print(f" actual: status={actual_status} fails={fails}") + for ln in proc.stdout.splitlines()[:20]: + print(f" {ln}") + if ok: + passed += 1 + finally: + try: + fpath.unlink() + except FileNotFoundError: + pass + # Clean up the directory. + try: + ADVERSARIAL_TMP_DIR.rmdir() + except OSError: + pass + print() + print(f"{passed}/{len(ADV_CASES)} adversarial cases pass") + return 0 if passed == len(ADV_CASES) else 1 + + def main() -> int: head_pkg = json.loads(HEAD_PKG.read_text()) print(f"Running {len(CASES)} edge cases against {SCRIPT.relative_to(REPO)}") @@ -180,7 +499,20 @@ def main() -> int: passed = sum(1 for _, ok, _ in results if ok) total = len(results) print(f"{passed}/{total} edge cases pass") - return 0 if passed == total else 1 + + print() + print(f"Running {len(CLASSIFY_CASES)} classify() unit cases") + print() + cls_rc = run_classify_unit_tests() + + print() + print(f"Running {len(ADV_CASES)} adversarial end-to-end cases") + print() + adv_rc = run_adversarial_cases() + + if passed == total and cls_rc == 0 and adv_rc == 0: + return 0 + return 1 if __name__ == "__main__": From 231d4f12cf9f713dd942421422d9e49fa79f7096 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 16 May 2026 05:06:43 +0000 Subject: [PATCH 03/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/check_frontend_dep_removal.py | 181 +++-- tests/studio/test_frontend_dep_removal.py | 842 +++++++++++++++------- 2 files changed, 696 insertions(+), 327 deletions(-) mode change 100755 => 100644 scripts/check_frontend_dep_removal.py diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py old mode 100755 new mode 100644 index 3f38ac6b4c..1d85e1a435 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -20,6 +20,7 @@ 1 at least one removed dep is referenced and not resolvable 2 invocation error (bad args, missing file, git error) """ + from __future__ import annotations import argparse @@ -56,15 +57,29 @@ ) GREP_INCLUDES = [ - "--include=*.ts", "--include=*.tsx", "--include=*.js", "--include=*.jsx", - "--include=*.mjs", "--include=*.cjs", - "--include=*.html", "--include=*.htm", - "--include=*.css", "--include=*.scss", "--include=*.sass", - "--include=*.json", "--include=*.jsonc", - "--include=*.md", "--include=*.mdx", - "--include=*.py", "--include=*.rs", "--include=*.toml", - "--include=*.yml", "--include=*.yaml", - "--include=*.sh", "--include=*.ps1", "--include=*.bat", + "--include=*.ts", + "--include=*.tsx", + "--include=*.js", + "--include=*.jsx", + "--include=*.mjs", + "--include=*.cjs", + "--include=*.html", + "--include=*.htm", + "--include=*.css", + "--include=*.scss", + "--include=*.sass", + "--include=*.json", + "--include=*.jsonc", + "--include=*.md", + "--include=*.mdx", + "--include=*.py", + "--include=*.rs", + "--include=*.toml", + "--include=*.yml", + "--include=*.yaml", + "--include=*.sh", + "--include=*.ps1", + "--include=*.bat", "--include=Dockerfile*", ] GREP_EXCLUDES = [ @@ -99,8 +114,11 @@ class Hit: def run(cmd: list[str], cwd: Path | None = None) -> str: """Run a command, return stdout. On non-zero exit, return ''.""" res = subprocess.run( - cmd, cwd=cwd or REPO_ROOT, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, text=True, + cmd, + cwd = cwd or REPO_ROOT, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + text = True, ) return res.stdout if res.returncode == 0 else "" @@ -212,8 +230,11 @@ def classify(pkg: str, file: str, content: str) -> str | None: # allowing arbitrary content (newlines included) between `import` # and `from`. The non-greedy match plus the required `from` keeps # this scoped to a single statement. - if re.search(rf"(? str | None: return "require" # Re-exports: `export * from "pkg"`, `export { x } from "pkg"`, # `export type { Foo } from "pkg"`. Multi-line supported. - if re.search(rf"\bexport\b[^;'\"]*?\bfrom\s+['\"]{esc}{sub}['\"]", - content, flags_dotall): + if re.search( + rf"\bexport\b[^;'\"]*?\bfrom\s+['\"]{esc}{sub}['\"]", content, flags_dotall + ): return "re_export" # HTML script / link if re.search(rf"]*src\s*=\s*['\"][^'\"]*/{esc}", content): @@ -271,19 +293,25 @@ def lockfile_root_sync(head_pkg: dict, head_lock: dict) -> list[str]: if not head_lock: return warnings root = head_lock.get("packages", {}).get("", {}) - lock_decl = {**(root.get("dependencies") or {}), - **(root.get("devDependencies") or {}), - **(root.get("peerDependencies") or {}), - **(root.get("optionalDependencies") or {})} + lock_decl = { + **(root.get("dependencies") or {}), + **(root.get("devDependencies") or {}), + **(root.get("peerDependencies") or {}), + **(root.get("optionalDependencies") or {}), + } pkg_decl = {} for f in DEP_FIELDS: pkg_decl.update(head_pkg.get(f) or {}) only_in_lock = set(lock_decl) - set(pkg_decl) only_in_pkg = set(pkg_decl) - set(lock_decl) if only_in_lock: - warnings.append(f"lockfile lists deps not in package.json (lockfile stale): {sorted(only_in_lock)}") + warnings.append( + f"lockfile lists deps not in package.json (lockfile stale): {sorted(only_in_lock)}" + ) if only_in_pkg: - warnings.append(f"package.json declares deps not in lockfile (run npm install): {sorted(only_in_pkg)}") + warnings.append( + f"package.json declares deps not in lockfile (run npm install): {sorted(only_in_pkg)}" + ) return warnings @@ -302,14 +330,16 @@ def types_orphan_warnings(head_pkg: dict) -> list[str]: # @types/foo provides types for `foo` # @types/foo-bar provides types for `foo-bar` # @types/scope__pkg provides types for `@scope/pkg` - target = name[len("@types/"):] + target = name[len("@types/") :] if "__" in target: scope, sub = target.split("__", 1) target = f"@{scope}/{sub}" if target == "node": continue # Node.js types are always implicit if target not in decl: - warnings.append(f"@types/{target.replace('@', '').replace('/', '__')} present but '{target}' is not declared") + warnings.append( + f"@types/{target.replace('@', '').replace('/', '__')} present but '{target}' is not declared" + ) return warnings @@ -324,9 +354,16 @@ def find_imports_without_decl(head_pkg: dict) -> list[tuple[str, int, str]]: decl.update((head_pkg.get(f) or {}).keys()) # Also: anything tsconfig path-aliases (just '@/...' here) is internal pattern = r"(?:from|import)\s+['\"]([^'\"./][^'\"]*)['\"]" - args = ["grep", "-rnE", pattern, - "--include=*.ts", "--include=*.tsx", "--include=*.js", "--include=*.jsx", - "studio/frontend/src"] + args = [ + "grep", + "-rnE", + pattern, + "--include=*.ts", + "--include=*.tsx", + "--include=*.js", + "--include=*.jsx", + "studio/frontend/src", + ] out = run(args) missing = [] for line in out.splitlines(): @@ -347,8 +384,19 @@ def find_imports_without_decl(head_pkg: dict) -> list[tuple[str, int, str]]: # Internal aliases like '@/foo' or starts with builtin names if pkg_name == "@": continue - if pkg_name in {"node:fs", "node:path", "fs", "path", "url", "stream", - "crypto", "buffer", "util", "events", "child_process"}: + if pkg_name in { + "node:fs", + "node:path", + "fs", + "path", + "url", + "stream", + "crypto", + "buffer", + "util", + "events", + "child_process", + }: continue missing.append((file, ln, spec)) return missing @@ -371,7 +419,9 @@ def grep_repo(pat: str) -> list[tuple[str, int, str]]: def _read_file(path: str) -> list[str]: if path not in _file_lines_cache: try: - _file_lines_cache[path] = Path(path).read_text(errors="replace").splitlines() + _file_lines_cache[path] = ( + Path(path).read_text(errors = "replace").splitlines() + ) except (OSError, UnicodeDecodeError): _file_lines_cache[path] = [] return _file_lines_cache[path] @@ -409,20 +459,38 @@ def find_usage(pkg: str) -> list[Hit]: def main() -> int: - p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter) - p.add_argument("--base", default="origin/main", - help="git ref to diff against (default: origin/main). " - "Examples: HEAD~1, main, a-tag, a-sha.") - p.add_argument("--base-pkg", help="optional override: read base package.json from this path") - p.add_argument("--base-lock", help="optional override: read base lockfile from this path") - p.add_argument("--head-pkg", default=str(REPO_ROOT / FRONTEND_PKG), - help="head package.json path (default: working tree)") - p.add_argument("--head-lock", default=str(REPO_ROOT / FRONTEND_LOCK), - help="head lockfile path (default: working tree)") - p.add_argument("--verbose", action="store_true") - p.add_argument("--strict", action="store_true", - help="Also fail on hygiene warnings (lockfile sync, " - "@types orphans, imports without declared dep).") + p = argparse.ArgumentParser( + description = __doc__, formatter_class = argparse.RawTextHelpFormatter + ) + p.add_argument( + "--base", + default = "origin/main", + help = "git ref to diff against (default: origin/main). " + "Examples: HEAD~1, main, a-tag, a-sha.", + ) + p.add_argument( + "--base-pkg", help = "optional override: read base package.json from this path" + ) + p.add_argument( + "--base-lock", help = "optional override: read base lockfile from this path" + ) + p.add_argument( + "--head-pkg", + default = str(REPO_ROOT / FRONTEND_PKG), + help = "head package.json path (default: working tree)", + ) + p.add_argument( + "--head-lock", + default = str(REPO_ROOT / FRONTEND_LOCK), + help = "head lockfile path (default: working tree)", + ) + p.add_argument("--verbose", action = "store_true") + p.add_argument( + "--strict", + action = "store_true", + help = "Also fail on hygiene warnings (lockfile sync, " + "@types orphans, imports without declared dep).", + ) args = p.parse_args() if args.base_pkg: @@ -431,10 +499,16 @@ def main() -> int: base_pkg = read_pkg_at(args.base, FRONTEND_PKG) head_pkg = read_pkg_file(Path(args.head_pkg)) if not base_pkg: - print(f"ERROR: could not read base package.json at {args.base}:{FRONTEND_PKG}", file=sys.stderr) + print( + f"ERROR: could not read base package.json at {args.base}:{FRONTEND_PKG}", + file = sys.stderr, + ) return 2 if not head_pkg: - print(f"ERROR: could not read head package.json at {args.head_pkg}", file=sys.stderr) + print( + f"ERROR: could not read head package.json at {args.head_pkg}", + file = sys.stderr, + ) return 2 if args.base_lock: @@ -450,7 +524,9 @@ def main() -> int: print("[OK] no dependencies removed from studio/frontend/package.json") return 0 - print(f"Checking {len(removed)} removed package(s) from studio/frontend/package.json") + print( + f"Checking {len(removed)} removed package(s) from studio/frontend/package.json" + ) print(f"Base: {args.base} Head: working tree") print() @@ -464,7 +540,8 @@ def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: top = f"node_modules/{name}" top_path = top if top in reachable_paths else None nested = sorted( - p for p in reachable_paths + p + for p in reachable_paths if p != top and p.endswith(f"/node_modules/{name}") ) return top_path, nested @@ -487,8 +564,10 @@ def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: if top: print(f" reachable (top-level): {top}") if nested: - print(f" reachable (nested, NOT importable from src/): {nested[0]}" - + (f" (+{len(nested)-1} more)" if len(nested) > 1 else "")) + print( + f" reachable (nested, NOT importable from src/): {nested[0]}" + + (f" (+{len(nested)-1} more)" if len(nested) > 1 else "") + ) if hits: for h in hits[:5]: print(f" [{h.kind}] {h.file}:{h.line} {h.snippet}") @@ -524,7 +603,9 @@ def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: ) if failures: - print(f"FAIL: {len(failures)} removed package(s) still referenced and not resolvable") + print( + f"FAIL: {len(failures)} removed package(s) still referenced and not resolvable" + ) for name, _ in failures: print(f" - {name}") return 1 diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py index 549223aac6..bcc03d1468 100644 --- a/tests/studio/test_frontend_dep_removal.py +++ b/tests/studio/test_frontend_dep_removal.py @@ -12,6 +12,7 @@ Exits 0 iff every case behaves as expected. """ + from __future__ import annotations import json @@ -39,83 +40,193 @@ class Case: CASES: list[Case] = [ - Case("C1", "removing next-themes breaks 2 src imports", - ["next-themes"], "FAIL", ["next-themes"]), - Case("C2", "removing @xyflow/react breaks recipe-studio src imports " - "(no other declared dep pulls @xyflow/react)", - ["@xyflow/react"], "FAIL", ["@xyflow/react"]), - Case("C3", "removing katex is safe: streamdown/math, mermaid, " - "rehype-katex all keep it at top level", - ["katex"], "PASS", []), - Case("C4", "removing clsx is safe: streamdown keeps it", - ["clsx"], "PASS", []), - Case("C5", "removing react is safe: peer of countless packages", - ["react"], "PASS", []), - Case("C6", "removing @radix-ui/react-slot is safe: pulled by " - "radix-ui umbrella + @assistant-ui/react", - ["@radix-ui/react-slot"], "PASS", []), - Case("C7", "removing zustand is safe: @assistant-ui/react keeps " - "top-level zustand@5.x (nested xyflow 4.x is irrelevant " - "to src imports)", - ["zustand"], "PASS", []), - Case("C8", "multi-remove with mixed safety: next-themes + " - "@huggingface/hub + dexie all unsafe", - ["next-themes", "@huggingface/hub", "dexie"], "FAIL", - ["next-themes", "@huggingface/hub", "dexie"]), - Case("C9", "removing @huggingface/hub breaks 5+ src imports", - ["@huggingface/hub"], "FAIL", ["@huggingface/hub"]), - Case("C10", "removing tailwind-merge is safe: streamdown keeps it", - ["tailwind-merge"], "PASS", []), - Case("C11", "removing a non-existent name is a no-op", - ["__never_existed_in_pkg__"], "PASS", []), - Case("C12", "moving @hugeicons/react from deps to devDeps is NOT a " - "removal (still declared)", - [], "PASS", [], move_to_dev=["@hugeicons/react"]), - Case("C13", "removing @huggingface/hub AND @xyflow/react together: both " - "are root-only deps with no other parents, so both should FAIL", - ["@huggingface/hub", "@xyflow/react"], "FAIL", - ["@huggingface/hub", "@xyflow/react"]), - Case("C14", "removing dexie breaks src imports (no other declared " - "dep needs it)", - ["dexie"], "FAIL", ["dexie"]), - Case("C15", "removing motion (used in 20+ src imports including " - "framer-motion-style animations); no transitive parent", - ["motion"], "FAIL", ["motion"]), - Case("C16", "removing canvas-confetti (imported in confetti.tsx); " - "no transitive parent", - ["canvas-confetti"], "FAIL", ["canvas-confetti"]), - Case("C17", "removing recharts (imported in chart.tsx); no transitive " - "parent", - ["recharts"], "FAIL", ["recharts"]), - Case("C18", "removing js-yaml is safe: @eslint/eslintrc keeps it " - "(triggers @types/js-yaml orphan warning, non-fatal)", - ["js-yaml"], "PASS", []), - Case("C19", "removing node-forge (imported in providers-api.ts); " - "no transitive parent", - ["node-forge"], "FAIL", ["node-forge"]), - Case("C20", "removing @tauri-apps/api is safe: all 5 @tauri-apps " - "plugins declare it as a direct dep", - ["@tauri-apps/api"], "PASS", []), - Case("C21", "removing mammoth (imported in runtime-provider.tsx); " - "no transitive parent", - ["mammoth"], "FAIL", ["mammoth"]), - Case("C22", "removing unpdf (imported in runtime-provider.tsx); " - "no transitive parent", - ["unpdf"], "FAIL", ["unpdf"]), - Case("C23", "removing remark-gfm is safe: streamdown declares it " - "as a direct dep", - ["remark-gfm"], "PASS", []), - Case("C24", "removing date-fns is safe: react-day-picker and " - "@base-ui/react both declare it as a direct dep", - ["date-fns"], "PASS", []), + Case( + "C1", + "removing next-themes breaks 2 src imports", + ["next-themes"], + "FAIL", + ["next-themes"], + ), + Case( + "C2", + "removing @xyflow/react breaks recipe-studio src imports " + "(no other declared dep pulls @xyflow/react)", + ["@xyflow/react"], + "FAIL", + ["@xyflow/react"], + ), + Case( + "C3", + "removing katex is safe: streamdown/math, mermaid, " + "rehype-katex all keep it at top level", + ["katex"], + "PASS", + [], + ), + Case("C4", "removing clsx is safe: streamdown keeps it", ["clsx"], "PASS", []), + Case( + "C5", + "removing react is safe: peer of countless packages", + ["react"], + "PASS", + [], + ), + Case( + "C6", + "removing @radix-ui/react-slot is safe: pulled by " + "radix-ui umbrella + @assistant-ui/react", + ["@radix-ui/react-slot"], + "PASS", + [], + ), + Case( + "C7", + "removing zustand is safe: @assistant-ui/react keeps " + "top-level zustand@5.x (nested xyflow 4.x is irrelevant " + "to src imports)", + ["zustand"], + "PASS", + [], + ), + Case( + "C8", + "multi-remove with mixed safety: next-themes + " + "@huggingface/hub + dexie all unsafe", + ["next-themes", "@huggingface/hub", "dexie"], + "FAIL", + ["next-themes", "@huggingface/hub", "dexie"], + ), + Case( + "C9", + "removing @huggingface/hub breaks 5+ src imports", + ["@huggingface/hub"], + "FAIL", + ["@huggingface/hub"], + ), + Case( + "C10", + "removing tailwind-merge is safe: streamdown keeps it", + ["tailwind-merge"], + "PASS", + [], + ), + Case( + "C11", + "removing a non-existent name is a no-op", + ["__never_existed_in_pkg__"], + "PASS", + [], + ), + Case( + "C12", + "moving @hugeicons/react from deps to devDeps is NOT a " + "removal (still declared)", + [], + "PASS", + [], + move_to_dev = ["@hugeicons/react"], + ), + Case( + "C13", + "removing @huggingface/hub AND @xyflow/react together: both " + "are root-only deps with no other parents, so both should FAIL", + ["@huggingface/hub", "@xyflow/react"], + "FAIL", + ["@huggingface/hub", "@xyflow/react"], + ), + Case( + "C14", + "removing dexie breaks src imports (no other declared " "dep needs it)", + ["dexie"], + "FAIL", + ["dexie"], + ), + Case( + "C15", + "removing motion (used in 20+ src imports including " + "framer-motion-style animations); no transitive parent", + ["motion"], + "FAIL", + ["motion"], + ), + Case( + "C16", + "removing canvas-confetti (imported in confetti.tsx); " "no transitive parent", + ["canvas-confetti"], + "FAIL", + ["canvas-confetti"], + ), + Case( + "C17", + "removing recharts (imported in chart.tsx); no transitive " "parent", + ["recharts"], + "FAIL", + ["recharts"], + ), + Case( + "C18", + "removing js-yaml is safe: @eslint/eslintrc keeps it " + "(triggers @types/js-yaml orphan warning, non-fatal)", + ["js-yaml"], + "PASS", + [], + ), + Case( + "C19", + "removing node-forge (imported in providers-api.ts); " "no transitive parent", + ["node-forge"], + "FAIL", + ["node-forge"], + ), + Case( + "C20", + "removing @tauri-apps/api is safe: all 5 @tauri-apps " + "plugins declare it as a direct dep", + ["@tauri-apps/api"], + "PASS", + [], + ), + Case( + "C21", + "removing mammoth (imported in runtime-provider.tsx); " "no transitive parent", + ["mammoth"], + "FAIL", + ["mammoth"], + ), + Case( + "C22", + "removing unpdf (imported in runtime-provider.tsx); " "no transitive parent", + ["unpdf"], + "FAIL", + ["unpdf"], + ), + Case( + "C23", + "removing remark-gfm is safe: streamdown declares it " "as a direct dep", + ["remark-gfm"], + "PASS", + [], + ), + Case( + "C24", + "removing date-fns is safe: react-day-picker and " + "@base-ui/react both declare it as a direct dep", + ["date-fns"], + "PASS", + [], + ), ] def synth_head(head_pkg: dict, case: Case) -> dict: out = json.loads(json.dumps(head_pkg)) for name in case.remove: - for field in ("dependencies", "devDependencies", - "peerDependencies", "optionalDependencies"): + for field in ( + "dependencies", + "devDependencies", + "peerDependencies", + "optionalDependencies", + ): (out.get(field) or {}).pop(name, None) if case.move_to_dev: for name in case.move_to_dev: @@ -127,16 +238,23 @@ def synth_head(head_pkg: dict, case: Case) -> dict: def run_case(case: Case, head_pkg: dict) -> tuple[bool, str]: synth = synth_head(head_pkg, case) - with tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) as f: - json.dump(synth, f, indent=2) + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth, f, indent = 2) synth_path = f.name try: proc = subprocess.run( - [sys.executable, str(SCRIPT), - "--base-pkg", str(HEAD_PKG), - "--head-pkg", synth_path, - "--head-lock", str(HEAD_LOCK)], - capture_output=True, text=True, + [ + sys.executable, + str(SCRIPT), + "--base-pkg", + str(HEAD_PKG), + "--head-pkg", + synth_path, + "--head-lock", + str(HEAD_LOCK), + ], + capture_output = True, + text = True, ) finally: os.unlink(synth_path) @@ -151,15 +269,13 @@ def run_case(case: Case, head_pkg: dict) -> tuple[bool, str]: if in_summary and line.strip().startswith("- "): failure_pkgs.append(line.strip()[2:]) - ok = ( - actual_status == case.expected_status - and set(failure_pkgs) == set(case.expected_failures) + ok = actual_status == case.expected_status and set(failure_pkgs) == set( + case.expected_failures ) return ok, ( f"expected: status={case.expected_status} fails={sorted(case.expected_failures)}\n" f"actual: status={actual_status} fails={sorted(failure_pkgs)}\n" - f"--- stdout (first 30 lines) ---\n" - + "\n".join(proc.stdout.splitlines()[:30]) + f"--- stdout (first 30 lines) ---\n" + "\n".join(proc.stdout.splitlines()[:30]) ) @@ -172,6 +288,7 @@ def run_case(case: Case, head_pkg: dict) -> tuple[bool, str]: # Import the script's classify() by file path so this test does not need # the package to be installed. import importlib.util as _ilu + _spec = _ilu.spec_from_file_location("_dep_check", str(SCRIPT)) _dep_check = _ilu.module_from_spec(_spec) sys.modules["_dep_check"] = _dep_check # required so @dataclass can resolve annotations @@ -191,122 +308,234 @@ class ClassifyCase: CLASSIFY_CASES: list[ClassifyCase] = [ # Bog-standard shapes - ClassifyCase("U01", "single-line static import", - "next-themes", "src/x.tsx", - 'import { ThemeProvider } from "next-themes";', - "static_import"), - ClassifyCase("U02", "side-effect import", - "katex", "src/x.tsx", - 'import "katex/dist/katex.min.css";', - "side_effect_import"), - ClassifyCase("U03", "dynamic import", - "@tauri-apps/api", "src/x.tsx", - 'const { x } = await import("@tauri-apps/api/window");', - "dynamic_import"), - ClassifyCase("U04", "require()", - "lodash", "src/x.js", - 'const _ = require("lodash");', - "require"), - ClassifyCase("U05", "CSS @import", - "tailwindcss", "src/x.css", - '@import "tailwindcss";', - "css_import"), + ClassifyCase( + "U01", + "single-line static import", + "next-themes", + "src/x.tsx", + 'import { ThemeProvider } from "next-themes";', + "static_import", + ), + ClassifyCase( + "U02", + "side-effect import", + "katex", + "src/x.tsx", + 'import "katex/dist/katex.min.css";', + "side_effect_import", + ), + ClassifyCase( + "U03", + "dynamic import", + "@tauri-apps/api", + "src/x.tsx", + 'const { x } = await import("@tauri-apps/api/window");', + "dynamic_import", + ), + ClassifyCase( + "U04", + "require()", + "lodash", + "src/x.js", + 'const _ = require("lodash");', + "require", + ), + ClassifyCase( + "U05", + "CSS @import", + "tailwindcss", + "src/x.css", + '@import "tailwindcss";', + "css_import", + ), # Sneaky shapes - ClassifyCase("U06", "multi-line static import", - "next-themes", "src/x.tsx", - 'import {\n ThemeProvider,\n useTheme,\n} from "next-themes";', - "static_import"), - ClassifyCase("U07", "import type", - "@huggingface/hub", "src/x.ts", - 'import type { PipelineType } from "@huggingface/hub";', - "static_import"), - ClassifyCase("U08", "export * from re-export", - "@some-org/secrets", "src/x.ts", - 'export * from "@some-org/secrets";', - "re_export"), - ClassifyCase("U09", "export { x } from re-export", - "lodash-es", "src/x.ts", - 'export { foo, bar } from "lodash-es";', - "re_export"), - ClassifyCase("U10", "export type ... from re-export", - "@huggingface/hub", "src/x.ts", - 'export type { Foo } from "@huggingface/hub";', - "re_export"), - ClassifyCase("U11", "multi-line export from re-export", - "@some/pkg", "src/x.ts", - 'export {\n thing,\n other,\n} from "@some/pkg";', - "re_export"), - ClassifyCase("U12", "JSDoc @import", - "react", "src/x.ts", - '/** @type {import("react").FC} */\nconst Foo = () => null;', - "dynamic_import"), - ClassifyCase("U13", "template literal package path", - "@assistant-ui/react", "src/x.tsx", - 'const url = `@assistant-ui/react`;', - "template_literal"), - ClassifyCase("U14", "new URL import-meta", - "monaco-editor", "src/x.ts", - 'new URL("monaco-editor/esm/vs/editor/editor.worker", import.meta.url);', - "new_url"), - ClassifyCase("U15", "tsc triple-slash type ref", - "@types/some-pkg", "src/x.ts", - '/// ', - "tsc_triple_slash"), - ClassifyCase("U16", "HTML script src", - "alpinejs", "index.html", - '', - "html_script"), - ClassifyCase("U17", "HTML link href", - "alpinejs", "index.html", - '', - "html_link"), - ClassifyCase("U18", "bare quoted string in tsconfig paths", - "@huggingface/hub", "tsconfig.json", - '"paths": { "hf": ["@huggingface/hub/*"] }', - "string_literal"), - ClassifyCase("U19", "vite alias key", - "@dagrejs/dagre", "vite.config.ts", - '"@dagrejs/dagre": path.resolve(__dirname, "./..."),', - "string_literal"), + ClassifyCase( + "U06", + "multi-line static import", + "next-themes", + "src/x.tsx", + 'import {\n ThemeProvider,\n useTheme,\n} from "next-themes";', + "static_import", + ), + ClassifyCase( + "U07", + "import type", + "@huggingface/hub", + "src/x.ts", + 'import type { PipelineType } from "@huggingface/hub";', + "static_import", + ), + ClassifyCase( + "U08", + "export * from re-export", + "@some-org/secrets", + "src/x.ts", + 'export * from "@some-org/secrets";', + "re_export", + ), + ClassifyCase( + "U09", + "export { x } from re-export", + "lodash-es", + "src/x.ts", + 'export { foo, bar } from "lodash-es";', + "re_export", + ), + ClassifyCase( + "U10", + "export type ... from re-export", + "@huggingface/hub", + "src/x.ts", + 'export type { Foo } from "@huggingface/hub";', + "re_export", + ), + ClassifyCase( + "U11", + "multi-line export from re-export", + "@some/pkg", + "src/x.ts", + 'export {\n thing,\n other,\n} from "@some/pkg";', + "re_export", + ), + ClassifyCase( + "U12", + "JSDoc @import", + "react", + "src/x.ts", + '/** @type {import("react").FC} */\nconst Foo = () => null;', + "dynamic_import", + ), + ClassifyCase( + "U13", + "template literal package path", + "@assistant-ui/react", + "src/x.tsx", + "const url = `@assistant-ui/react`;", + "template_literal", + ), + ClassifyCase( + "U14", + "new URL import-meta", + "monaco-editor", + "src/x.ts", + 'new URL("monaco-editor/esm/vs/editor/editor.worker", import.meta.url);', + "new_url", + ), + ClassifyCase( + "U15", + "tsc triple-slash type ref", + "@types/some-pkg", + "src/x.ts", + '/// ', + "tsc_triple_slash", + ), + ClassifyCase( + "U16", + "HTML script src", + "alpinejs", + "index.html", + '', + "html_script", + ), + ClassifyCase( + "U17", + "HTML link href", + "alpinejs", + "index.html", + '', + "html_link", + ), + ClassifyCase( + "U18", + "bare quoted string in tsconfig paths", + "@huggingface/hub", + "tsconfig.json", + '"paths": { "hf": ["@huggingface/hub/*"] }', + "string_literal", + ), + ClassifyCase( + "U19", + "vite alias key", + "@dagrejs/dagre", + "vite.config.ts", + '"@dagrejs/dagre": path.resolve(__dirname, "./..."),', + "string_literal", + ), # False-positive guards (these should NOT detect) - ClassifyCase("U20", "different package with shared prefix", - "foo", "src/x.ts", - 'import { x } from "foobar";', - None), - ClassifyCase("U21", "package mentioned in plain comment text", - "react", "src/x.ts", - '// We migrated from react-router to tanstack-router', - None), - ClassifyCase("U22", "package name as a URL path tail is NOT detected " - "(boundary rule: pkg must be followed by quote or `/`)", - "react", "src/x.ts", - 'const docs = "https://example.com/react";', - None), - ClassifyCase("U23", "package name in Python file (ignored, " - "Python can never import npm packages)", - "playwright", "tests/x.py", - 'label: str = "playwright"', - None), - ClassifyCase("U24", "exact-prefix collision: pkg 'lodash' and 'lodash-es'", - "lodash", "src/x.ts", - 'import _ from "lodash-es";', - None), - ClassifyCase("U25", "scoped pkg substring collision", - "@radix-ui/react-label", "src/x.ts", - 'import x from "@radix-ui/react-label-extra";', - None), - ClassifyCase("U26", "package only mentioned in a markdown link", - "react", "README.md", - "See [react](https://react.dev).", - None), - ClassifyCase("U27", "side-effect import with subpath", - "katex", "src/x.css", - '@import "katex/dist/katex.min.css";', - "css_import"), - ClassifyCase("U28", "require.resolve", - "lodash", "build/x.cjs", - 'const path = require.resolve("lodash/fp");', - "require"), + ClassifyCase( + "U20", + "different package with shared prefix", + "foo", + "src/x.ts", + 'import { x } from "foobar";', + None, + ), + ClassifyCase( + "U21", + "package mentioned in plain comment text", + "react", + "src/x.ts", + "// We migrated from react-router to tanstack-router", + None, + ), + ClassifyCase( + "U22", + "package name as a URL path tail is NOT detected " + "(boundary rule: pkg must be followed by quote or `/`)", + "react", + "src/x.ts", + 'const docs = "https://example.com/react";', + None, + ), + ClassifyCase( + "U23", + "package name in Python file (ignored, " + "Python can never import npm packages)", + "playwright", + "tests/x.py", + 'label: str = "playwright"', + None, + ), + ClassifyCase( + "U24", + "exact-prefix collision: pkg 'lodash' and 'lodash-es'", + "lodash", + "src/x.ts", + 'import _ from "lodash-es";', + None, + ), + ClassifyCase( + "U25", + "scoped pkg substring collision", + "@radix-ui/react-label", + "src/x.ts", + 'import x from "@radix-ui/react-label-extra";', + None, + ), + ClassifyCase( + "U26", + "package only mentioned in a markdown link", + "react", + "README.md", + "See [react](https://react.dev).", + None, + ), + ClassifyCase( + "U27", + "side-effect import with subpath", + "katex", + "src/x.css", + '@import "katex/dist/katex.min.css";', + "css_import", + ), + ClassifyCase( + "U28", + "require.resolve", + "lodash", + "build/x.cjs", + 'const path = require.resolve("lodash/fp");', + "require", + ), ] @@ -349,79 +578,125 @@ class AdvCase: ADV_CASES: list[AdvCase] = [ - AdvCase("A01", "multi-line import of removed pkg should FAIL", - "adv01.ts", - 'import {\n foo,\n bar,\n} from "__adv_only_pkg_a__";\n', - "__adv_only_pkg_a__", - "FAIL", ["__adv_only_pkg_a__"]), - AdvCase("A02", "export * from removed pkg should FAIL", - "adv02.ts", - 'export * from "__adv_only_pkg_b__";\n', - "__adv_only_pkg_b__", - "FAIL", ["__adv_only_pkg_b__"]), - AdvCase("A03", "export { x } from removed pkg should FAIL", - "adv03.ts", - 'export { foo, bar } from "__adv_only_pkg_c__";\n', - "__adv_only_pkg_c__", - "FAIL", ["__adv_only_pkg_c__"]), - AdvCase("A04", "export type ... from removed pkg should FAIL", - "adv04.ts", - 'export type { Foo } from "__adv_only_pkg_d__";\n', - "__adv_only_pkg_d__", - "FAIL", ["__adv_only_pkg_d__"]), - AdvCase("A05", "package with similar prefix should NOT trigger FAIL", - "adv05.ts", - # The file imports __adv_only_pkg_e_extra__, but we will try - # to "remove" the shorter __adv_only_pkg_e__ name. The shorter - # name has zero real usage, so removal must be safe. - 'import x from "__adv_only_pkg_e_extra__";\n', - "__adv_only_pkg_e__", - "PASS", []), - AdvCase("A06", "dynamic import of removed pkg should FAIL", - "adv06.ts", - 'const m = await import("__adv_only_pkg_f__");\n', - "__adv_only_pkg_f__", - "FAIL", ["__adv_only_pkg_f__"]), - AdvCase("A07", "new URL of removed pkg should FAIL", - "adv07.ts", - 'const w = new URL("__adv_only_pkg_g__/worker.js", import.meta.url);\n', - "__adv_only_pkg_g__", - "FAIL", ["__adv_only_pkg_g__"]), - AdvCase("A08", "string-concat dynamic import is unanalyzable (PASS)", - "adv08.ts", - 'const m = await import("__adv_only_" + "pkg_h__");\n', - "__adv_only_pkg_h__", - "PASS", []), - AdvCase("A09", "package referenced only inside a JS comment " - "is conservatively flagged via the string_literal fallback " - "(this is acceptable -- err on the side of caution)", - "adv09.ts", - '// TODO: import x from "__adv_only_pkg_i__"\n', - "__adv_only_pkg_i__", - "FAIL", ["__adv_only_pkg_i__"]), - AdvCase("A10", "package referenced only in a Python file should " - "NOT trigger a JS FAIL", - "adv10.py", - 'label = "__adv_only_pkg_j__"\n', - "__adv_only_pkg_j__", - "PASS", []), - AdvCase("A11", "package mentioned in a markdown doc file is " - "ignored by JS-like-only string_literal", - "adv11.md", - 'See [docs](https://example.com/__adv_only_pkg_k__).\n', - "__adv_only_pkg_k__", - "PASS", []), - AdvCase("A12", "JSDoc @import of removed pkg should FAIL", - "adv12.ts", - '/** @type {import("__adv_only_pkg_l__").Foo} */\n' - 'const x = null;\n', - "__adv_only_pkg_l__", - "FAIL", ["__adv_only_pkg_l__"]), + AdvCase( + "A01", + "multi-line import of removed pkg should FAIL", + "adv01.ts", + 'import {\n foo,\n bar,\n} from "__adv_only_pkg_a__";\n', + "__adv_only_pkg_a__", + "FAIL", + ["__adv_only_pkg_a__"], + ), + AdvCase( + "A02", + "export * from removed pkg should FAIL", + "adv02.ts", + 'export * from "__adv_only_pkg_b__";\n', + "__adv_only_pkg_b__", + "FAIL", + ["__adv_only_pkg_b__"], + ), + AdvCase( + "A03", + "export { x } from removed pkg should FAIL", + "adv03.ts", + 'export { foo, bar } from "__adv_only_pkg_c__";\n', + "__adv_only_pkg_c__", + "FAIL", + ["__adv_only_pkg_c__"], + ), + AdvCase( + "A04", + "export type ... from removed pkg should FAIL", + "adv04.ts", + 'export type { Foo } from "__adv_only_pkg_d__";\n', + "__adv_only_pkg_d__", + "FAIL", + ["__adv_only_pkg_d__"], + ), + AdvCase( + "A05", + "package with similar prefix should NOT trigger FAIL", + "adv05.ts", + # The file imports __adv_only_pkg_e_extra__, but we will try + # to "remove" the shorter __adv_only_pkg_e__ name. The shorter + # name has zero real usage, so removal must be safe. + 'import x from "__adv_only_pkg_e_extra__";\n', + "__adv_only_pkg_e__", + "PASS", + [], + ), + AdvCase( + "A06", + "dynamic import of removed pkg should FAIL", + "adv06.ts", + 'const m = await import("__adv_only_pkg_f__");\n', + "__adv_only_pkg_f__", + "FAIL", + ["__adv_only_pkg_f__"], + ), + AdvCase( + "A07", + "new URL of removed pkg should FAIL", + "adv07.ts", + 'const w = new URL("__adv_only_pkg_g__/worker.js", import.meta.url);\n', + "__adv_only_pkg_g__", + "FAIL", + ["__adv_only_pkg_g__"], + ), + AdvCase( + "A08", + "string-concat dynamic import is unanalyzable (PASS)", + "adv08.ts", + 'const m = await import("__adv_only_" + "pkg_h__");\n', + "__adv_only_pkg_h__", + "PASS", + [], + ), + AdvCase( + "A09", + "package referenced only inside a JS comment " + "is conservatively flagged via the string_literal fallback " + "(this is acceptable -- err on the side of caution)", + "adv09.ts", + '// TODO: import x from "__adv_only_pkg_i__"\n', + "__adv_only_pkg_i__", + "FAIL", + ["__adv_only_pkg_i__"], + ), + AdvCase( + "A10", + "package referenced only in a Python file should " "NOT trigger a JS FAIL", + "adv10.py", + 'label = "__adv_only_pkg_j__"\n', + "__adv_only_pkg_j__", + "PASS", + [], + ), + AdvCase( + "A11", + "package mentioned in a markdown doc file is " + "ignored by JS-like-only string_literal", + "adv11.md", + "See [docs](https://example.com/__adv_only_pkg_k__).\n", + "__adv_only_pkg_k__", + "PASS", + [], + ), + AdvCase( + "A12", + "JSDoc @import of removed pkg should FAIL", + "adv12.ts", + '/** @type {import("__adv_only_pkg_l__").Foo} */\n' "const x = null;\n", + "__adv_only_pkg_l__", + "FAIL", + ["__adv_only_pkg_l__"], + ), ] def run_adversarial_cases() -> int: - ADVERSARIAL_TMP_DIR.mkdir(parents=True, exist_ok=True) + ADVERSARIAL_TMP_DIR.mkdir(parents = True, exist_ok = True) head_pkg = json.loads(HEAD_PKG.read_text()) passed = 0 for ac in ADV_CASES: @@ -434,20 +709,30 @@ def run_adversarial_cases() -> int: # removed and scans the repo, which now includes our file. synth_base = json.loads(json.dumps(head_pkg)) synth_base.setdefault("dependencies", {})[ac.target_pkg] = "^1.0.0" - with tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) as f: - json.dump(synth_base, f, indent=2) + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth_base, f, indent = 2) base_path = f.name try: proc = subprocess.run( - [sys.executable, str(SCRIPT), - "--base-pkg", base_path, - "--head-pkg", str(HEAD_PKG), - "--head-lock", str(HEAD_LOCK)], - capture_output=True, text=True, cwd=str(REPO), + [ + sys.executable, + str(SCRIPT), + "--base-pkg", + base_path, + "--head-pkg", + str(HEAD_PKG), + "--head-lock", + str(HEAD_LOCK), + ], + capture_output = True, + text = True, + cwd = str(REPO), ) finally: os.unlink(base_path) - actual_status = {0: "PASS", 1: "FAIL"}.get(proc.returncode, f"RC{proc.returncode}") + actual_status = {0: "PASS", 1: "FAIL"}.get( + proc.returncode, f"RC{proc.returncode}" + ) fails = [] in_summary = False for line in proc.stdout.splitlines(): @@ -456,12 +741,15 @@ def run_adversarial_cases() -> int: continue if in_summary and line.strip().startswith("- "): fails.append(line.strip()[2:]) - ok = (actual_status == ac.expected_status - and set(fails) == set(ac.expected_failures)) + ok = actual_status == ac.expected_status and set(fails) == set( + ac.expected_failures + ) mark = "PASS" if ok else "FAIL" print(f" [{mark}] {ac.id}: {ac.desc}") if not ok: - print(f" expected: status={ac.expected_status} fails={ac.expected_failures}") + print( + f" expected: status={ac.expected_status} fails={ac.expected_failures}" + ) print(f" actual: status={actual_status} fails={fails}") for ln in proc.stdout.splitlines()[:20]: print(f" {ln}") From 52cf06bd6609dd94c50cdc6d7a03e4cef0fee0e4 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 05:17:10 +0000 Subject: [PATCH 04/14] scripts: detect bin references in package.json scripts Catches the last common false-negative: removing a package whose bin is only referenced through `package.json` scripts (e.g. dropping typescript while `"build": "tsc -b && vite build"` calls tsc). Cross-checked the patterns Vercel/Next.js, Vite, and TanStack use in their own manifests; the bin/scripts pairing is the one consumer-side pattern dep checkers commonly miss. How it works: - Build a bin-to-package map from each lockfile entry's `bin` field. The map is global so a stale lockfile still resolves bins from packages about to be pruned. - Tokenize each script value, splitting on `&&`, `||`, `;`, `|`. Strip env-var assignments and `npx / pnpx / yarn / pnpm / bunx` prefixes, plus `./node_modules/.bin/` and `node_modules/.bin/` path prefixes. Look up the leading token in the bin map. - Hits are reported as `script_bin` and feed the same reachability gate as source imports. A bin still installed transitively (e.g. vite via @vitejs/plugin-react peer) is OK-via-transitive; an orphaned bin is FAIL. Test additions: - 5 new edge cases: removing vite, typescript, eslint, @biomejs/biome, and (@biomejs/biome + @vitejs/plugin-react) together. Correctly flags @biomejs/biome and the combo as FAIL while vite / typescript / eslint are kept by peers. - 8 new classify() unit cases: TypeScript ambient `declare module`, namespace imports, combined default+named, default-as-named, re-export default (4 forms), `.then()` dynamic imports without await, and TypeScript `import()` in type position. Current total: 29 edge + 36 classify-unit + 12 adversarial = 77 / 77. --- scripts/check_frontend_dep_removal.py | 58 ++++++++++++ tests/studio/test_frontend_dep_removal.py | 105 ++++++++++++++++++++++ 2 files changed, 163 insertions(+) diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index 1d85e1a435..52851a888a 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -343,6 +343,60 @@ def types_orphan_warnings(head_pkg: dict) -> list[str]: return warnings +def build_bin_to_pkg(head_lock: dict) -> dict[str, str]: + """Map a binary name (e.g. 'vite', 'tsc', 'eslint') to the package + that provides it. Built from each lockfile entry's `bin` field. + """ + out: dict[str, str] = {} + if not head_lock: + return out + for path, meta in head_lock.get("packages", {}).items(): + if not path: + continue + name = path.split("node_modules/")[-1] + bins = meta.get("bin") + if isinstance(bins, dict): + for binname in bins: + out.setdefault(binname, name) + elif isinstance(bins, str): + out.setdefault(name.split("/")[-1], name) + return out + + +_SCRIPT_TOKENIZE = re.compile(r"\s*(?:&&|\|\||;|\|(?!\|))\s*") + + +def scripts_bin_refs(head_pkg: dict, bin_to_pkg: dict[str, str]) -> dict[str, list[str]]: + """Return `{package_name: ['scripts.X: cmd', ...]}` listing every + package referenced via its bin name in package.json scripts. + """ + scripts = head_pkg.get("scripts", {}) or {} + refs: dict[str, list[str]] = {} + for script_name, raw_cmd in scripts.items(): + if not isinstance(raw_cmd, str): + continue + for chunk in _SCRIPT_TOKENIZE.split(raw_cmd): + chunk = chunk.strip() + if not chunk: + continue + words = chunk.split() + idx = 0 + while idx < len(words) and re.match(r"^[A-Za-z_][A-Za-z0-9_]*=", words[idx]): + idx += 1 + if idx >= len(words): + continue + first = words[idx] + if first in {"npx", "pnpx", "yarn", "pnpm", "bunx"} and idx + 1 < len(words): + idx += 1 + first = words[idx] + first = first.removeprefix("./node_modules/.bin/") + first = first.removeprefix("node_modules/.bin/") + pkg = bin_to_pkg.get(first) + if pkg: + refs.setdefault(pkg, []).append(f"scripts.{script_name}: {raw_cmd}") + return refs + + def find_imports_without_decl(head_pkg: dict) -> list[tuple[str, int, str]]: """Reverse check: find bare-specifier imports in studio/frontend/src that don't correspond to any declared package.json dep. Catches the @@ -531,6 +585,8 @@ def main() -> int: print() reachable_paths = reachable_from_head(head_pkg, head_lock) if head_lock else set() + bin_to_pkg = build_bin_to_pkg(head_lock) if head_lock else {} + script_refs = scripts_bin_refs(head_pkg, bin_to_pkg) def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: """Return (top_level_path, nested_paths). top_level is what bare @@ -549,6 +605,8 @@ def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: failures: list[tuple[str, list[Hit]]] = [] for name in removed: hits = find_usage(name) + for cite in script_refs.get(name, []): + hits.append(Hit("studio/frontend/package.json", 0, "script_bin", cite)) top, nested = reachable_install_paths(name) importable_top_level = top is not None # Source imports of bare specifier `name` resolve ONLY to top-level diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py index bcc03d1468..256534734a 100644 --- a/tests/studio/test_frontend_dep_removal.py +++ b/tests/studio/test_frontend_dep_removal.py @@ -215,6 +215,47 @@ class Case: "PASS", [], ), + Case( + "C25", + "removing vite is safe: @vitejs/plugin-react and @tailwindcss/vite " + "keep it via peer (bin still resolves)", + ["vite"], + "PASS", + [], + ), + Case( + "C26", + "removing typescript is safe: 11 transitive @typescript-eslint/* " + "parents keep tsc bin alive", + ["typescript"], + "PASS", + [], + ), + Case( + "C27", + "removing eslint is safe: typescript-eslint and eslint-plugin-* " + "peers keep eslint bin alive", + ["eslint"], + "PASS", + [], + ), + Case( + "C28", + "removing @biomejs/biome breaks scripts.biome:check / biome:fix " + "(no transitive parents, biome bin orphans)", + ["@biomejs/biome"], + "FAIL", + ["@biomejs/biome"], + ), + Case( + "C29", + "removing both @biomejs/biome AND @vitejs/plugin-react together: " + "biome dies outright; vite loses one of its two retained peers " + "but @tailwindcss/vite still keeps it", + ["@biomejs/biome", "@vitejs/plugin-react"], + "FAIL", + ["@biomejs/biome", "@vitejs/plugin-react"], + ), ] @@ -536,6 +577,70 @@ class ClassifyCase: 'const path = require.resolve("lodash/fp");', "require", ), + ClassifyCase( + "U29", + "TypeScript ambient `declare module`", + "@tanstack/react-router", + "src/app/router.tsx", + 'declare module "@tanstack/react-router" {\n interface X {}\n}', + "string_literal", + ), + ClassifyCase( + "U30", + "namespace import `import * as X from pkg`", + "@radix-ui/react-slot", + "src/x.tsx", + 'import * as Slot from "@radix-ui/react-slot";', + "static_import", + ), + ClassifyCase( + "U31", + "combined default + named import", + "react", + "src/x.tsx", + 'import React, { useState } from "react";', + "static_import", + ), + ClassifyCase( + "U32", + "default-as-named import alias", + "react", + "src/x.tsx", + 'import { default as R } from "react";', + "static_import", + ), + ClassifyCase( + "U33", + "re-export default", + "lodash", + "src/x.ts", + 'export { default } from "lodash";', + "re_export", + ), + ClassifyCase( + "U34", + "re-export default as alias", + "lodash", + "src/x.ts", + 'export { default as _ } from "lodash";', + "re_export", + ), + ClassifyCase( + "U35", + ".then() dynamic import (no await)", + "@tauri-apps/api", + "src/x.ts", + 'import("@tauri-apps/api/window").then(m => m.x());', + "dynamic_import", + ), + ClassifyCase( + "U36", + "TypeScript import() in type position", + "react", + "src/x.ts", + 'type C = import("react").ComponentType;', + "dynamic_import", + ), ] From 17705256c389caddaaf1aabdf491dc72ede4ab7c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 16 May 2026 05:17:26 +0000 Subject: [PATCH 05/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/check_frontend_dep_removal.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index 52851a888a..3725fe07a1 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -366,7 +366,9 @@ def build_bin_to_pkg(head_lock: dict) -> dict[str, str]: _SCRIPT_TOKENIZE = re.compile(r"\s*(?:&&|\|\||;|\|(?!\|))\s*") -def scripts_bin_refs(head_pkg: dict, bin_to_pkg: dict[str, str]) -> dict[str, list[str]]: +def scripts_bin_refs( + head_pkg: dict, bin_to_pkg: dict[str, str] +) -> dict[str, list[str]]: """Return `{package_name: ['scripts.X: cmd', ...]}` listing every package referenced via its bin name in package.json scripts. """ @@ -381,12 +383,16 @@ def scripts_bin_refs(head_pkg: dict, bin_to_pkg: dict[str, str]) -> dict[str, li continue words = chunk.split() idx = 0 - while idx < len(words) and re.match(r"^[A-Za-z_][A-Za-z0-9_]*=", words[idx]): + while idx < len(words) and re.match( + r"^[A-Za-z_][A-Za-z0-9_]*=", words[idx] + ): idx += 1 if idx >= len(words): continue first = words[idx] - if first in {"npx", "pnpx", "yarn", "pnpm", "bunx"} and idx + 1 < len(words): + if first in {"npx", "pnpx", "yarn", "pnpm", "bunx"} and idx + 1 < len( + words + ): idx += 1 first = words[idx] first = first.removeprefix("./node_modules/.bin/") From d8c48404edcc4c95408c7fb3b87a7e329fc804bb Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 05:24:45 +0000 Subject: [PATCH 06/14] scripts: detect package.json field references to packages After surveying package.json patterns in 10+ popular repos (React, Vue/Svelte/Astro/Next.js, Vite, Storybook, TanStack/Query, Tailwind, ESLint, TypeScript, Prettier, SvelteKit), several config fields in package.json itself can reference packages by string. My checker filtered all of package.json out of the string_literal fallback, so removing a package that is only referenced from one of these fields was a false negative. Now covered (new pkg_json_field kind): - overrides / resolutions / pnpm.overrides keys - pnpm.patchedDependencies keys - peerDependenciesMeta keys - prettier: "@my/prettier-config" string - eslintConfig.extends (string or array) - stylelint.extends / stylelint.plugins - babel.presets / babel.plugins - jest.preset / jest.setupFiles / jest.transform - commitlint.extends - renovate.extends - remarkConfig.plugins - any other tool config field whose strings/keys equal the pkg name or `pkg/subpath` False-positive guards (do not flag string values inside): - browserslist (browser queries) - keywords (free-form strings) - engines / engineStrict / packageManager / volta (version pins) - files / directories / publishConfig (paths) - workspaces (paths/globs) - main / module / browser / types / typings / exports / imports / bin / man (author-side fields) - scripts (already handled separately via scripts_bin_refs) - name / version / description / author / repository / homepage etc. Test additions: new PkgFieldCase suite with 19 cases covering each tool config field, subpath references, and the 5 false-positive guards. Combined with the existing 29 edge / 36 classify / 12 adversarial cases, the suite is 96 / 96. --- scripts/check_frontend_dep_removal.py | 81 ++++++++ tests/studio/test_frontend_dep_removal.py | 242 +++++++++++++++++++++- 2 files changed, 322 insertions(+), 1 deletion(-) diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index 3725fe07a1..c51e2a872d 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -343,6 +343,85 @@ def types_orphan_warnings(head_pkg: dict) -> list[str]: return warnings +_PKG_JSON_SKIP_KEYS = { + "dependencies", + "devDependencies", + "peerDependencies", + "optionalDependencies", + "bundleDependencies", + "bundledDependencies", +} + +# Top-level fields whose contents are never package references. We walk +# everything else recursively. +_PKG_JSON_OPAQUE_KEYS = { + "browserslist", # browser queries + "keywords", # free-form strings + "engines", # node/npm version constraints + "engineStrict", # bool + "packageManager", # `pnpm@9.0.0` -- the package manager binary + "volta", # version pins for node/npm/yarn + "files", # paths included in publish + "directories", # paths + "publishConfig", # registry / access config + "config", # generic npm config values + "main", "module", "browser", "types", "typings", "type", "exports", + "imports", "bin", "man", # author-side fields (not consumer refs) + "scripts", # handled separately via scripts_bin_refs() + "repository", "bugs", "homepage", "funding", + "author", "contributors", "maintainers", "license", "licenses", + "name", "version", "description", "private", "sideEffects", + "workspaces", # paths/globs, NOT pkg names +} + + +def package_json_extra_refs(pkg: dict, target: str) -> list[str]: + """Walk every key/value in package.json EXCEPT the dep declaration + blocks, and return citations for string values or dict keys that + equal `target` (or `target/subpath`). + + Catches the patterns the public dep-checker tools commonly miss: + - `overrides` / `resolutions` / `pnpm.overrides` keys + - `pnpm.patchedDependencies` keys + - `peerDependenciesMeta` keys + - `prettier`: "@my/prettier-config" + - `eslintConfig.extends`: ["..."] / "..." + - `stylelint.extends` / `stylelint.plugins` + - `babel.presets` / `babel.plugins` + - `jest.preset` / `jest.setupFiles` / `jest.transform` + - `commitlint.extends`, `renovate.extends`, `remarkConfig.plugins` + """ + target_sub = target + "/" + cites: list[str] = [] + + def matches(s: object) -> bool: + return isinstance(s, str) and (s == target or s.startswith(target_sub)) + + def walk(obj: object, path: str) -> None: + if isinstance(obj, dict): + for k, v in obj.items(): + # Skip top-level dep declaration fields entirely. + if path == "" and k in _PKG_JSON_SKIP_KEYS: + continue + # Top-level fields whose contents are never package refs. + if path == "" and k in _PKG_JSON_OPAQUE_KEYS: + continue + # Inside `overrides` / `resolutions` / etc., the KEY itself + # is a package reference. + if matches(k): + cites.append(f"{path}.{k}" if path else k) + walk(v, f"{path}.{k}" if path else k) + elif isinstance(obj, list): + for i, v in enumerate(obj): + walk(v, f"{path}[{i}]") + elif isinstance(obj, str): + if matches(obj): + cites.append(f"{path}: {obj}") + + walk(pkg, "") + return cites + + def build_bin_to_pkg(head_lock: dict) -> dict[str, str]: """Map a binary name (e.g. 'vite', 'tsc', 'eslint') to the package that provides it. Built from each lockfile entry's `bin` field. @@ -613,6 +692,8 @@ def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: hits = find_usage(name) for cite in script_refs.get(name, []): hits.append(Hit("studio/frontend/package.json", 0, "script_bin", cite)) + for cite in package_json_extra_refs(head_pkg, name): + hits.append(Hit("studio/frontend/package.json", 0, "pkg_json_field", cite)) top, nested = reachable_install_paths(name) importable_top_level = top is not None # Source imports of bare specifier `name` resolve ONLY to top-level diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py index 256534734a..4fd8f4f4d8 100644 --- a/tests/studio/test_frontend_dep_removal.py +++ b/tests/studio/test_frontend_dep_removal.py @@ -800,6 +800,241 @@ class AdvCase: ] +# --------------------------------------------------------------------------- +# package.json field-reference cases: simulate `prettier: "@x/config"`, +# `eslintConfig.extends`, `overrides`, `peerDependenciesMeta`, etc. +# These test the package_json_extra_refs() coverage. Cross-checked against +# the patterns used by Tailwind, Stylelint, Prettier, Next.js, Astro, +# TypeScript, ESLint, SvelteKit, Storybook, Vite, and TanStack/Query +# manifests. +# --------------------------------------------------------------------------- + + +@dataclass +class PkgFieldCase: + id: str + desc: str + field_patch: dict # extra fields to merge into synth_head package.json + target_pkg: str + expected_status: str + expected_failures: list[str] + + +PKG_FIELD_CASES: list[PkgFieldCase] = [ + PkgFieldCase( + "P01", + "removing pkg referenced only in `prettier` string field", + {"prettier": "__pkg_prettier_config__"}, + "__pkg_prettier_config__", + "FAIL", + ["__pkg_prettier_config__"], + ), + PkgFieldCase( + "P02", + "removing pkg referenced in `eslintConfig.extends` array", + {"eslintConfig": {"extends": ["__pkg_eslint_cfg__"]}}, + "__pkg_eslint_cfg__", + "FAIL", + ["__pkg_eslint_cfg__"], + ), + PkgFieldCase( + "P03", + "removing pkg referenced in `stylelint.plugins`", + {"stylelint": {"plugins": ["__pkg_stylelint_plugin__"]}}, + "__pkg_stylelint_plugin__", + "FAIL", + ["__pkg_stylelint_plugin__"], + ), + PkgFieldCase( + "P04", + "removing pkg referenced in `babel.presets`", + {"babel": {"presets": [["__pkg_babel_preset__", {"opt": 1}]]}}, + "__pkg_babel_preset__", + "FAIL", + ["__pkg_babel_preset__"], + ), + PkgFieldCase( + "P05", + "removing pkg used as a key in `overrides`", + {"overrides": {"__pkg_overridden__": "^1.0.0"}}, + "__pkg_overridden__", + "FAIL", + ["__pkg_overridden__"], + ), + PkgFieldCase( + "P06", + "removing pkg used as a key in `pnpm.overrides`", + {"pnpm": {"overrides": {"__pkg_pnpm_override__": "^1.0.0"}}}, + "__pkg_pnpm_override__", + "FAIL", + ["__pkg_pnpm_override__"], + ), + PkgFieldCase( + "P07", + "removing pkg used as a key in `pnpm.patchedDependencies`", + {"pnpm": {"patchedDependencies": {"__pkg_patched__": "patches/x.patch"}}}, + "__pkg_patched__", + "FAIL", + ["__pkg_patched__"], + ), + PkgFieldCase( + "P08", + "removing pkg used as a key in `peerDependenciesMeta`", + {"peerDependenciesMeta": {"__pkg_peer_meta__": {"optional": True}}}, + "__pkg_peer_meta__", + "FAIL", + ["__pkg_peer_meta__"], + ), + PkgFieldCase( + "P09", + "removing pkg referenced in `jest.preset` string", + {"jest": {"preset": "__pkg_jest_preset__"}}, + "__pkg_jest_preset__", + "FAIL", + ["__pkg_jest_preset__"], + ), + PkgFieldCase( + "P10", + "removing pkg referenced in `commitlint.extends`", + {"commitlint": {"extends": ["__pkg_commitlint__"]}}, + "__pkg_commitlint__", + "FAIL", + ["__pkg_commitlint__"], + ), + PkgFieldCase( + "P11", + "removing pkg referenced in `renovate.extends`", + {"renovate": {"extends": ["__pkg_renovate__"]}}, + "__pkg_renovate__", + "FAIL", + ["__pkg_renovate__"], + ), + PkgFieldCase( + "P12", + "removing pkg referenced in `remarkConfig.plugins`", + {"remarkConfig": {"plugins": ["__pkg_remark__"]}}, + "__pkg_remark__", + "FAIL", + ["__pkg_remark__"], + ), + PkgFieldCase( + "P13", + "removing pkg with subpath ref in tool config (`pkg/config`)", + {"prettier": "__pkg_prettier_sub__/config"}, + "__pkg_prettier_sub__", + "FAIL", + ["__pkg_prettier_sub__"], + ), + PkgFieldCase( + "P14", + "false-positive guard: similar-prefix package in tool config", + {"prettier": "__pkg_short_extra__/config"}, + "__pkg_short__", + "PASS", + [], + ), + PkgFieldCase( + "P15", + "false-positive guard: package-named string in `browserslist` " + "must NOT trigger (browserslist values are browser queries, " + "never package names)", + {"browserslist": ["last 2 versions", "__pkg_browserslist__"]}, + "__pkg_browserslist__", + "PASS", + [], + ), + PkgFieldCase( + "P16", + "false-positive guard: matching string in `keywords` field", + {"keywords": ["__pkg_keyword__", "foo"]}, + "__pkg_keyword__", + "PASS", + [], + ), + PkgFieldCase( + "P17", + "false-positive guard: matching string in `workspaces` (paths)", + {"workspaces": ["packages/__pkg_workspace_path__"]}, + "__pkg_workspace_path__", + "PASS", + [], + ), + PkgFieldCase( + "P18", + "false-positive guard: matching value in `files` field", + {"files": ["dist/__pkg_in_files__"]}, + "__pkg_in_files__", + "PASS", + [], + ), + PkgFieldCase( + "P19", + "false-positive guard: matching `packageManager` string", + {"packageManager": "__pkg_in_pm__@1.0.0"}, + "__pkg_in_pm__", + "PASS", + [], + ), +] + + +def run_pkg_field_cases() -> int: + head_pkg = json.loads(HEAD_PKG.read_text()) + passed = 0 + for pc in PKG_FIELD_CASES: + synth_head = json.loads(json.dumps(head_pkg)) + # Apply the field patch (deep-merge isn't needed; we control the keys). + for k, v in pc.field_patch.items(): + synth_head[k] = v + # Base has the target in dependencies; head does not. The extra field + # in synth_head references the target pkg even though it's no longer + # in deps. + synth_base = json.loads(json.dumps(head_pkg)) + synth_base.setdefault("dependencies", {})[pc.target_pkg] = "^1.0.0" + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth_base, f, indent = 2) + base_path = f.name + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth_head, f, indent = 2) + head_path = f.name + try: + proc = subprocess.run( + [sys.executable, str(SCRIPT), + "--base-pkg", base_path, + "--head-pkg", head_path, + "--head-lock", str(HEAD_LOCK)], + capture_output = True, text = True, cwd = str(REPO), + ) + finally: + os.unlink(base_path) + os.unlink(head_path) + actual_status = {0: "PASS", 1: "FAIL"}.get(proc.returncode, f"RC{proc.returncode}") + fails: list[str] = [] + in_summary = False + for line in proc.stdout.splitlines(): + if "FAIL:" in line and "removed package" in line: + in_summary = True + continue + if in_summary and line.strip().startswith("- "): + fails.append(line.strip()[2:]) + # The expected_failures includes the tolerated-FP case (P15); we + # accept BOTH expected_status and expected_failures matches. + ok = (actual_status == pc.expected_status + and set(fails) == set(pc.expected_failures)) + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {pc.id}: {pc.desc}") + if not ok: + print(f" expected: status={pc.expected_status} fails={pc.expected_failures}") + print(f" actual: status={actual_status} fails={fails}") + for ln in proc.stdout.splitlines()[:25]: + print(f" {ln}") + if ok: + passed += 1 + print() + print(f"{passed}/{len(PKG_FIELD_CASES)} package.json-field cases pass") + return 0 if passed == len(PKG_FIELD_CASES) else 1 + + def run_adversarial_cases() -> int: ADVERSARIAL_TMP_DIR.mkdir(parents = True, exist_ok = True) head_pkg = json.loads(HEAD_PKG.read_text()) @@ -903,7 +1138,12 @@ def main() -> int: print() adv_rc = run_adversarial_cases() - if passed == total and cls_rc == 0 and adv_rc == 0: + print() + print(f"Running {len(PKG_FIELD_CASES)} package.json-field cases") + print() + pkg_rc = run_pkg_field_cases() + + if passed == total and cls_rc == 0 and adv_rc == 0 and pkg_rc == 0: return 0 return 1 From e83e325ee307fb813d20806d7b05e5d574fc3f1a Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 05:29:18 +0000 Subject: [PATCH 07/14] scripts: enumerate dead deps in studio/frontend Adds an opt-in dead-dep enumeration to the existing safety check. Iterates every package declared in studio/frontend/package.json (all four dep fields combined) and reports each as one of: used at least one detected reference -- in src/, a config file, package.json scripts (bin), a package.json tool-config field (overrides / prettier / eslintConfig / stylelint / babel / jest / commitlint / renovate / etc.), or tsconfig.compilerOptions.types unused no detected reference anywhere type_pkg_kept @types/X where X is still declared (or X = node, always implicit) type_pkg_orphan @types/X where X is no longer declared -- candidate for removal alongside X Wiring: - New CLI flag `--enumerate-dead` (off by default). - CI workflow now passes `--enumerate-dead` so the report shows on every PR run; the report is informational unless `--strict` is also set. - With `--strict`, unused / type_pkg_orphan entries fail the run. Tests: - 5 new EnumCase scenarios: E01 fake dep with no usage -> reported unused E02 fake dep imported by a synthetic src file -> reported used E03 fake dep referenced only in overrides -> reported used E04 @types/X paired with X (also imported) -> kept E05 @types/X without X -> orphan Running the new flag against the current main reproduces exactly the 11 deps PR #5477 removed, validating the heuristic end to end. Current total: 29 edge + 36 classify + 12 adversarial + 19 pkg-json field + 5 enumeration = 101 / 101. --- .github/workflows/studio-frontend-ci.yml | 3 +- scripts/check_frontend_dep_removal.py | 142 ++++++++++++++++- tests/studio/test_frontend_dep_removal.py | 183 +++++++++++++++++++++- 3 files changed, 324 insertions(+), 4 deletions(-) diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml index 546198b8b2..ecbcb12d9a 100644 --- a/.github/workflows/studio-frontend-ci.yml +++ b/.github/workflows/studio-frontend-ci.yml @@ -93,7 +93,8 @@ jobs: working-directory: ${{ github.workspace }} run: | python3 scripts/check_frontend_dep_removal.py \ - --base "origin/${{ github.base_ref }}" + --base "origin/${{ github.base_ref }}" \ + --enumerate-dead python3 tests/studio/test_frontend_dep_removal.py - name: Typecheck diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index c51e2a872d..2c99acbd30 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -482,6 +482,93 @@ def scripts_bin_refs( return refs +def tsconfig_compiler_types_refs() -> set[str]: + """Read studio/frontend/tsconfig*.json and return the set of + package names referenced in compilerOptions.types arrays. These are + implicitly loaded by tsc and count as a real use even though they + have no explicit import. + """ + out: set[str] = set() + base = REPO_ROOT / "studio/frontend" + for name in ("tsconfig.json", "tsconfig.app.json", "tsconfig.node.json"): + path = base / name + if not path.exists(): + continue + try: + text = path.read_text() + # tsconfig allows comments; strip simple line comments. + text = re.sub(r"//[^\n]*", "", text) + data = json.loads(text) + except (OSError, json.JSONDecodeError): + continue + types = (data.get("compilerOptions", {}) or {}).get("types", []) or [] + for t in types: + if not isinstance(t, str): + continue + # `vite/client` resolves to `vite` package. + pkg = t.split("/", 1)[0] if not t.startswith("@") else "/".join(t.split("/", 2)[:2]) + out.add(pkg) + return out + + +def enumerate_dep_usage(head_pkg: dict, head_lock: dict) -> dict[str, list]: + """For every declared dep, classify whether it appears used. Returns + a dict with these categories: + - used: has at least one detected usage in src/, + config files, scripts.bin, package.json + field refs, or tsconfig types + - unused: no detected usage anywhere + - type_pkg_kept: @types/X where X is still declared + - type_pkg_orphan: @types/X where X is no longer declared + (or X is removed) -- candidate for removal + + Each entry is the package name. The categorisation is opinionated; + `unused` is a CANDIDATE list, not a guarantee. The caller should + verify before deletion. + """ + decl = all_decl_names(head_pkg) + bin_to_pkg = build_bin_to_pkg(head_lock) if head_lock else {} + script_refs = scripts_bin_refs(head_pkg, bin_to_pkg) + tsc_types = tsconfig_compiler_types_refs() + + results: dict[str, list] = { + "used": [], + "unused": [], + "type_pkg_kept": [], + "type_pkg_orphan": [], + } + for name in sorted(decl): + if name.startswith("@types/"): + target = name[len("@types/") :] + if "__" in target: + scope, sub = target.split("__", 1) + target = f"@{scope}/{sub}" + if target == "node": + results["type_pkg_kept"].append(name) + elif target in decl: + results["type_pkg_kept"].append(name) + else: + results["type_pkg_orphan"].append(name) + continue + # Real-source-usage check + hits = find_usage(name) + used = bool(hits) + # Bin scripts + if not used and name in script_refs: + used = True + # package.json non-dep field references + if not used and package_json_extra_refs(head_pkg, name): + used = True + # tsconfig compilerOptions.types implicit usage + if not used and name in tsc_types: + used = True + if used: + results["used"].append(name) + else: + results["unused"].append(name) + return results + + def find_imports_without_decl(head_pkg: dict) -> list[tuple[str, int, str]]: """Reverse check: find bare-specifier imports in studio/frontend/src that don't correspond to any declared package.json dep. Catches the @@ -628,7 +715,13 @@ def main() -> int: "--strict", action = "store_true", help = "Also fail on hygiene warnings (lockfile sync, " - "@types orphans, imports without declared dep).", + "@types orphans, imports without declared dep, unused deps).", + ) + p.add_argument( + "--enumerate-dead", + action = "store_true", + help = "Print every declared dep that appears unused anywhere " + "in the repo. Informational; does not fail unless --strict.", ) args = p.parse_args() @@ -661,6 +754,26 @@ def main() -> int: if not removed: print("[OK] no dependencies removed from studio/frontend/package.json") + if args.enumerate_dead: + print() + enum = enumerate_dep_usage(head_pkg, head_lock) + print("Dead-dep enumeration:") + if enum["unused"]: + print(f" unused ({len(enum['unused'])}):") + for n in enum["unused"]: + print(f" - {n}") + else: + print(" unused: none") + if enum["type_pkg_orphan"]: + print(f" type_pkg_orphan ({len(enum['type_pkg_orphan'])}):") + for n in enum["type_pkg_orphan"]: + print(f" - {n}") + if args.verbose: + print(f" used: {len(enum['used'])}") + print(f" type_pkg_kept: {len(enum['type_pkg_kept'])}") + if args.strict and (enum["unused"] or enum["type_pkg_orphan"]): + print("FAIL (--strict): dead deps present") + return 1 return 0 print( @@ -743,8 +856,33 @@ def reachable_install_paths(name: str) -> tuple[str | None, list[str]]: print(f" - {file}:{ln} imports '{spec}'") print() + enum = None + if args.enumerate_dead: + enum = enumerate_dep_usage(head_pkg, head_lock) + print("Dead-dep enumeration:") + if enum["unused"]: + print(f" unused ({len(enum['unused'])}):") + for n in enum["unused"]: + print(f" - {n}") + else: + print(" unused: none") + if enum["type_pkg_orphan"]: + print(f" type_pkg_orphan ({len(enum['type_pkg_orphan'])}):") + for n in enum["type_pkg_orphan"]: + print(f" - {n}") + if args.verbose: + print(f" used: {len(enum['used'])}") + print(f" type_pkg_kept: {len(enum['type_pkg_kept'])}") + print() + strict_failures = bool(failures) or ( - args.strict and (sync_warns or types_warns or missing_imports) + args.strict + and ( + sync_warns + or types_warns + or missing_imports + or (enum and (enum["unused"] or enum["type_pkg_orphan"])) + ) ) if failures: diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py index 4fd8f4f4d8..c7d9bb3959 100644 --- a/tests/studio/test_frontend_dep_removal.py +++ b/tests/studio/test_frontend_dep_removal.py @@ -1110,6 +1110,176 @@ def run_adversarial_cases() -> int: return 0 if passed == len(ADV_CASES) else 1 +# --------------------------------------------------------------------------- +# Dead-dep enumeration cases. +# --------------------------------------------------------------------------- + + +@dataclass +class EnumCase: + id: str + desc: str + add_deps: dict[str, str] + add_dev_deps: dict[str, str] + field_patch: dict + extra_file: tuple[str, str] | None # (relative_path, content) or None + expected_unused: set[str] + expected_used: set[str] + expected_orphan_types: set[str] + + +ENUM_CASES: list[EnumCase] = [ + EnumCase( + "E01", + "fake dep with no usage anywhere is flagged unused", + {"__enum_fake_unused_pkg__": "^1.0.0"}, + {}, + {}, + None, + {"__enum_fake_unused_pkg__"}, + set(), + set(), + ), + EnumCase( + "E02", + "fake dep referenced via vite.config-style import is flagged used " + "(uses a real adversarial file as the import site)", + {"__enum_used_via_src__": "^1.0.0"}, + {}, + {}, + ( + "src/__dep_check_adversarial__/enum_e02.ts", + 'import x from "__enum_used_via_src__";\n', + ), + set(), + {"__enum_used_via_src__"}, + set(), + ), + EnumCase( + "E03", + "fake dep referenced only in package.json `overrides` is flagged used", + {"__enum_used_via_overrides__": "^1.0.0"}, + {}, + {"overrides": {"__enum_used_via_overrides__": "^1.0.0"}}, + None, + set(), + {"__enum_used_via_overrides__"}, + set(), + ), + EnumCase( + "E04", + "@types/X where X is declared -> kept (NOT orphan)", + {"__enum_real_pkg__": "^1.0.0"}, + {"@types/__enum_real_pkg__": "^1.0.0"}, + {}, + ( + "src/__dep_check_adversarial__/enum_e04.ts", + 'import x from "__enum_real_pkg__";\n', + ), + set(), + {"__enum_real_pkg__"}, + set(), + ), + EnumCase( + "E05", + "@types/X where X is NOT declared anywhere -> orphan", + {}, + {"@types/__enum_orphan_pkg__": "^1.0.0"}, + {}, + None, + set(), + set(), + {"@types/__enum_orphan_pkg__"}, + ), +] + + +def run_enum_cases() -> int: + head_pkg = json.loads(HEAD_PKG.read_text()) + passed = 0 + ADVERSARIAL_TMP_DIR.mkdir(parents = True, exist_ok = True) + for ec in ENUM_CASES: + synth_head = json.loads(json.dumps(head_pkg)) + synth_head.setdefault("dependencies", {}).update(ec.add_deps) + synth_head.setdefault("devDependencies", {}).update(ec.add_dev_deps) + for k, v in ec.field_patch.items(): + synth_head[k] = v + # Drop any temp source file if needed. + fpath = None + if ec.extra_file: + rel, content = ec.extra_file + fpath = REPO / rel + fpath.parent.mkdir(parents = True, exist_ok = True) + fpath.write_text(content) + with tempfile.NamedTemporaryFile("w", suffix = ".json", delete = False) as f: + json.dump(synth_head, f, indent = 2) + head_path = f.name + try: + proc = subprocess.run( + [sys.executable, str(SCRIPT), + "--base-pkg", str(HEAD_PKG), + "--head-pkg", head_path, + "--head-lock", str(HEAD_LOCK), + "--enumerate-dead"], + capture_output = True, text = True, cwd = str(REPO), + ) + finally: + os.unlink(head_path) + if fpath: + try: + fpath.unlink() + except FileNotFoundError: + pass + # Parse the dead-dep enumeration output. + unused: set[str] = set() + orphans: set[str] = set() + in_unused = False + in_orphan = False + for line in proc.stdout.splitlines(): + s = line.strip() + if s.startswith("unused ("): + in_unused = True + in_orphan = False + continue + if s.startswith("type_pkg_orphan ("): + in_unused = False + in_orphan = True + continue + if s.startswith("used:") or s.startswith("type_pkg_kept:"): + in_unused = in_orphan = False + continue + if s.startswith("- "): + if in_unused: + unused.add(s[2:]) + elif in_orphan: + orphans.add(s[2:]) + unused_ok = ec.expected_unused.issubset(unused) and ( + not ec.expected_used or not (ec.expected_used & unused) + ) + orphan_ok = ec.expected_orphan_types.issubset(orphans) + ok = unused_ok and orphan_ok + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {ec.id}: {ec.desc}") + if not ok: + print(f" expected unused superset: {sorted(ec.expected_unused)}") + print(f" expected used NOT in unused: {sorted(ec.expected_used)}") + print(f" expected orphans superset: {sorted(ec.expected_orphan_types)}") + print(f" actual unused: {sorted(unused)}") + print(f" actual orphans: {sorted(orphans)}") + for ln in proc.stdout.splitlines()[:30]: + print(f" {ln}") + if ok: + passed += 1 + # Cleanup tmp dir if empty. + try: + ADVERSARIAL_TMP_DIR.rmdir() + except OSError: + pass + print() + print(f"{passed}/{len(ENUM_CASES)} enumeration cases pass") + return 0 if passed == len(ENUM_CASES) else 1 + + def main() -> int: head_pkg = json.loads(HEAD_PKG.read_text()) print(f"Running {len(CASES)} edge cases against {SCRIPT.relative_to(REPO)}") @@ -1143,7 +1313,18 @@ def main() -> int: print() pkg_rc = run_pkg_field_cases() - if passed == total and cls_rc == 0 and adv_rc == 0 and pkg_rc == 0: + print() + print(f"Running {len(ENUM_CASES)} dead-dep enumeration cases") + print() + enum_rc = run_enum_cases() + + if ( + passed == total + and cls_rc == 0 + and adv_rc == 0 + and pkg_rc == 0 + and enum_rc == 0 + ): return 0 return 1 From 2660f408ecca8b8f2134d1b6d9dbd242ffe02cf2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 16 May 2026 05:29:56 +0000 Subject: [PATCH 08/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/check_frontend_dep_removal.py | 59 ++++++++++++++------- tests/studio/test_frontend_dep_removal.py | 63 ++++++++++++++--------- 2 files changed, 81 insertions(+), 41 deletions(-) diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index 2c99acbd30..0b112fa28b 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -355,23 +355,42 @@ def types_orphan_warnings(head_pkg: dict) -> list[str]: # Top-level fields whose contents are never package references. We walk # everything else recursively. _PKG_JSON_OPAQUE_KEYS = { - "browserslist", # browser queries - "keywords", # free-form strings - "engines", # node/npm version constraints - "engineStrict", # bool - "packageManager", # `pnpm@9.0.0` -- the package manager binary - "volta", # version pins for node/npm/yarn - "files", # paths included in publish - "directories", # paths - "publishConfig", # registry / access config - "config", # generic npm config values - "main", "module", "browser", "types", "typings", "type", "exports", - "imports", "bin", "man", # author-side fields (not consumer refs) - "scripts", # handled separately via scripts_bin_refs() - "repository", "bugs", "homepage", "funding", - "author", "contributors", "maintainers", "license", "licenses", - "name", "version", "description", "private", "sideEffects", - "workspaces", # paths/globs, NOT pkg names + "browserslist", # browser queries + "keywords", # free-form strings + "engines", # node/npm version constraints + "engineStrict", # bool + "packageManager", # `pnpm@9.0.0` -- the package manager binary + "volta", # version pins for node/npm/yarn + "files", # paths included in publish + "directories", # paths + "publishConfig", # registry / access config + "config", # generic npm config values + "main", + "module", + "browser", + "types", + "typings", + "type", + "exports", + "imports", + "bin", + "man", # author-side fields (not consumer refs) + "scripts", # handled separately via scripts_bin_refs() + "repository", + "bugs", + "homepage", + "funding", + "author", + "contributors", + "maintainers", + "license", + "licenses", + "name", + "version", + "description", + "private", + "sideEffects", + "workspaces", # paths/globs, NOT pkg names } @@ -506,7 +525,11 @@ def tsconfig_compiler_types_refs() -> set[str]: if not isinstance(t, str): continue # `vite/client` resolves to `vite` package. - pkg = t.split("/", 1)[0] if not t.startswith("@") else "/".join(t.split("/", 2)[:2]) + pkg = ( + t.split("/", 1)[0] + if not t.startswith("@") + else "/".join(t.split("/", 2)[:2]) + ) out.add(pkg) return out diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py index c7d9bb3959..50ac76471a 100644 --- a/tests/studio/test_frontend_dep_removal.py +++ b/tests/studio/test_frontend_dep_removal.py @@ -999,16 +999,26 @@ def run_pkg_field_cases() -> int: head_path = f.name try: proc = subprocess.run( - [sys.executable, str(SCRIPT), - "--base-pkg", base_path, - "--head-pkg", head_path, - "--head-lock", str(HEAD_LOCK)], - capture_output = True, text = True, cwd = str(REPO), + [ + sys.executable, + str(SCRIPT), + "--base-pkg", + base_path, + "--head-pkg", + head_path, + "--head-lock", + str(HEAD_LOCK), + ], + capture_output = True, + text = True, + cwd = str(REPO), ) finally: os.unlink(base_path) os.unlink(head_path) - actual_status = {0: "PASS", 1: "FAIL"}.get(proc.returncode, f"RC{proc.returncode}") + actual_status = {0: "PASS", 1: "FAIL"}.get( + proc.returncode, f"RC{proc.returncode}" + ) fails: list[str] = [] in_summary = False for line in proc.stdout.splitlines(): @@ -1019,12 +1029,15 @@ def run_pkg_field_cases() -> int: fails.append(line.strip()[2:]) # The expected_failures includes the tolerated-FP case (P15); we # accept BOTH expected_status and expected_failures matches. - ok = (actual_status == pc.expected_status - and set(fails) == set(pc.expected_failures)) + ok = actual_status == pc.expected_status and set(fails) == set( + pc.expected_failures + ) mark = "PASS" if ok else "FAIL" print(f" [{mark}] {pc.id}: {pc.desc}") if not ok: - print(f" expected: status={pc.expected_status} fails={pc.expected_failures}") + print( + f" expected: status={pc.expected_status} fails={pc.expected_failures}" + ) print(f" actual: status={actual_status} fails={fails}") for ln in proc.stdout.splitlines()[:25]: print(f" {ln}") @@ -1216,12 +1229,20 @@ def run_enum_cases() -> int: head_path = f.name try: proc = subprocess.run( - [sys.executable, str(SCRIPT), - "--base-pkg", str(HEAD_PKG), - "--head-pkg", head_path, - "--head-lock", str(HEAD_LOCK), - "--enumerate-dead"], - capture_output = True, text = True, cwd = str(REPO), + [ + sys.executable, + str(SCRIPT), + "--base-pkg", + str(HEAD_PKG), + "--head-pkg", + head_path, + "--head-lock", + str(HEAD_LOCK), + "--enumerate-dead", + ], + capture_output = True, + text = True, + cwd = str(REPO), ) finally: os.unlink(head_path) @@ -1263,7 +1284,9 @@ def run_enum_cases() -> int: if not ok: print(f" expected unused superset: {sorted(ec.expected_unused)}") print(f" expected used NOT in unused: {sorted(ec.expected_used)}") - print(f" expected orphans superset: {sorted(ec.expected_orphan_types)}") + print( + f" expected orphans superset: {sorted(ec.expected_orphan_types)}" + ) print(f" actual unused: {sorted(unused)}") print(f" actual orphans: {sorted(orphans)}") for ln in proc.stdout.splitlines()[:30]: @@ -1318,13 +1341,7 @@ def main() -> int: print() enum_rc = run_enum_cases() - if ( - passed == total - and cls_rc == 0 - and adv_rc == 0 - and pkg_rc == 0 - and enum_rc == 0 - ): + if passed == total and cls_rc == 0 and adv_rc == 0 and pkg_rc == 0 and enum_rc == 0: return 0 return 1 From 340f16c123839074aba4715939f27abe71c40329 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 06:10:59 +0000 Subject: [PATCH 09/14] ci: fetch base ref before running dep removal safety check actions/checkout uses fetch-depth: 1 by default, so when the dependency removal check ran `git show origin/main:.../package.json` the ref wasn't available locally and the script exited 2 with "could not read base package.json at origin/main:...". Fetch the single base commit before invoking the check so the git-show lookup resolves. --depth=1 keeps the extra fetch cheap. --- .github/workflows/studio-frontend-ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml index ecbcb12d9a..6aece1ff7b 100644 --- a/.github/workflows/studio-frontend-ci.yml +++ b/.github/workflows/studio-frontend-ci.yml @@ -88,10 +88,15 @@ jobs: # still imported somewhere. The script walks the lockfile dep graph # from the new top-level deps and only counts top-level node_modules # paths as valid resolution targets for bare src/ imports. + # + # actions/checkout uses fetch-depth: 1 by default, so the base branch + # is not available locally. Fetch the single base commit we need so + # `git show origin/:...` can resolve the pre-PR package.json. - name: Dependency removal safety check if: github.event_name == 'pull_request' working-directory: ${{ github.workspace }} run: | + git fetch --no-tags --depth=1 origin "${{ github.base_ref }}" python3 scripts/check_frontend_dep_removal.py \ --base "origin/${{ github.base_ref }}" \ --enumerate-dead From bdf6f84844663e1e0eb1bc7c27387a63f4496979 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 09:24:35 +0000 Subject: [PATCH 10/14] ci: address bot review on PR 5478 Five issues flagged across gemini and codex: * --base-lock argparse arg was defined and advertised in the docstring, but main() always read args.head_lock in both branches -- the flag did nothing. Dropped the dead arg and the misleading docstring line; the lockfile-reachability analysis only needs the head lockfile. * lock_resolvable() was defined but never called. Removed. * read_pkg_file() did not specify an encoding for read_text(). Added encoding="utf-8" for cross-platform stability. * read_pkg_file() returned {} when the path did not exist, so a bad --head-lock value silently bypassed the reachability checks (false PASS for removals that resolve through npm script bins). main() now exits 2 with a clear message when the head lockfile is missing, matching the existing behavior for the head pkg. * studio-frontend-ci.yml pull_request paths filter only matched studio/frontend/** and the workflow file, so PRs that modified the checker script or its test could skip this job. Added both files to the trigger. --- .github/workflows/studio-frontend-ci.yml | 2 ++ scripts/check_frontend_dep_removal.py | 35 ++++++++---------------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml index 6aece1ff7b..15453a5a14 100644 --- a/.github/workflows/studio-frontend-ci.yml +++ b/.github/workflows/studio-frontend-ci.yml @@ -15,6 +15,8 @@ on: pull_request: paths: - 'studio/frontend/**' + - 'scripts/check_frontend_dep_removal.py' + - 'tests/studio/test_frontend_dep_removal.py' - '.github/workflows/studio-frontend-ci.yml' push: branches: [main, pip] diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index 0b112fa28b..2e1e30ff48 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -13,7 +13,7 @@ python scripts/check_frontend_dep_removal.py python scripts/check_frontend_dep_removal.py --base origin/main python scripts/check_frontend_dep_removal.py --base HEAD~1 - python scripts/check_frontend_dep_removal.py --base-pkg PATH --base-lock PATH + python scripts/check_frontend_dep_removal.py --base-pkg PATH --head-lock PATH Exit codes: 0 every removed dep is safe (no source refs or still resolvable) @@ -134,7 +134,7 @@ def read_pkg_at(base: str, path: str) -> dict: def read_pkg_file(path: Path) -> dict: if not path.exists(): return {} - return json.loads(path.read_text()) + return json.loads(path.read_text(encoding = "utf-8")) def all_decl_names(pkg: dict) -> set[str]: @@ -144,19 +144,6 @@ def all_decl_names(pkg: dict) -> set[str]: return names -def lock_resolvable(lock: dict, name: str) -> list[str]: - """Return lockfile paths that still install `name` (transitive ok). - Naive: any path matching the name. May give false positives on a stale - lockfile. Prefer `reachable_from_head` for sync-aware analysis. - """ - pkgs = lock.get("packages", {}) - hits = [] - for path in pkgs: - if path == f"node_modules/{name}" or path.endswith(f"/node_modules/{name}"): - hits.append(path) - return hits - - def _resolve_install_path(parent_path: str, name: str, pkgs: dict) -> str | None: """Walk up the nested node_modules chain from `parent_path` to find where `name` actually resolves. Mirrors Node module resolution. @@ -720,9 +707,6 @@ def main() -> int: p.add_argument( "--base-pkg", help = "optional override: read base package.json from this path" ) - p.add_argument( - "--base-lock", help = "optional override: read base lockfile from this path" - ) p.add_argument( "--head-pkg", default = str(REPO_ROOT / FRONTEND_PKG), @@ -731,7 +715,8 @@ def main() -> int: p.add_argument( "--head-lock", default = str(REPO_ROOT / FRONTEND_LOCK), - help = "head lockfile path (default: working tree)", + help = "head lockfile path (default: working tree). " + "Reachability analysis runs against this lockfile.", ) p.add_argument("--verbose", action = "store_true") p.add_argument( @@ -766,10 +751,14 @@ def main() -> int: ) return 2 - if args.base_lock: - head_lock = read_pkg_file(Path(args.head_lock)) - else: - head_lock = read_pkg_file(Path(args.head_lock)) + head_lock_path = Path(args.head_lock) + if not head_lock_path.exists(): + print( + f"ERROR: head lockfile not found at {head_lock_path}", + file = sys.stderr, + ) + return 2 + head_lock = read_pkg_file(head_lock_path) base_names = all_decl_names(base_pkg) head_names = all_decl_names(head_pkg) From d0a5d5a5d94e84cae1e3c0fd7db710c09cbc42fc Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 10:09:28 +0000 Subject: [PATCH 11/14] ci: address 10x reviewer findings on dep removal safety check Eight P1s and three P2s surfaced across 10 codex reviewers; this commit addresses all of them. P1s: 1. Workflow refspec. `git fetch --depth=1 origin ` may only create FETCH_HEAD in shallow PR checkouts; the checker then dies with `fatal: invalid object name 'origin/main'`. Use the explicit refspec `:refs/remotes/origin/` so origin/ is reliably created. 2. `_deps_of()` was counting optional peer dependencies as reachable. npm only installs an optional peer when another package declares the same dep, so for "is this removed package still in the tree" they cannot keep it alive on their own. Skip entries marked `optional: true` in `peerDependenciesMeta`. 3. JS-syntactic classifiers (static_import, side_effect_import, dynamic_import, require, re_export, jsdoc_import, template_literal, tsc_triple_slash, new_url) now gate on file extension. Previously only the final string-literal fallback was gated, so a JS-shaped string inside a Python fixture or a Markdown code fence triggered a false FAIL. Added U37-U40 covering .py / .md / .sh / .yml. 4. HTML `', + None, + ), + ClassifyCase( + "U42", + "HTML with similar-prefix package is NOT a match", + "foo", + "index.html", + '', + None, + ), + ClassifyCase( + "U43", + "HTML ', + "html_script", + ), + # CSS url() unquoted variant -- valid CSS, must classify the same + # as the quoted variant. + ClassifyCase( + "U44", + "CSS url() unquoted bare package path", + "katex", + "src/x.css", + "src: url(katex/dist/fonts/font.woff2);", + "css_url", + ), + ClassifyCase( + "U45", + "CSS url() quoted bare package path still works", + "katex", + "src/x.css", + 'src: url("katex/dist/fonts/font.woff2");', + "css_url", + ), ] From 8d1b21c0edbc119c3d8a00b2d4694964513fa1e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 16 May 2026 10:09:39 +0000 Subject: [PATCH 12/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/check_frontend_dep_removal.py | 8 ++------ tests/studio/test_frontend_dep_removal.py | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index fcb841f4ee..4cbb07a546 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -64,9 +64,7 @@ TS_LIKE_EXT = re.compile(r"\.(ts|tsx|mts|cts|mdx)$") # Files where a removed package's CLI binary could be invoked (npx, bunx, # yarn dlx, pnpm exec, or a bare `pkg --flag` shell call). -COMMAND_LIKE_EXT = re.compile( - r"(\.(ya?ml|sh|ps1|bat)$|(^|/)Dockerfile[^/]*$)" -) +COMMAND_LIKE_EXT = re.compile(r"(\.(ya?ml|sh|ps1|bat)$|(^|/)Dockerfile[^/]*$)") GREP_INCLUDES = [ "--include=*.ts", @@ -295,9 +293,7 @@ def classify(pkg: str, file: str, content: str) -> str | None: rf"]*src\s*=\s*['\"][^'\"]*/{html_pkg}", content ): return "html_script" - if is_html and re.search( - rf"]*href\s*=\s*['\"][^'\"]*/{html_pkg}", content - ): + if is_html and re.search(rf"]*href\s*=\s*['\"][^'\"]*/{html_pkg}", content): return "html_link" # TypeScript triple-slash if is_ts and re.search( diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py index 0ed78a3276..5b3218cf41 100644 --- a/tests/studio/test_frontend_dep_removal.py +++ b/tests/studio/test_frontend_dep_removal.py @@ -651,7 +651,7 @@ class ClassifyCase: "JS import snippet inside a Python fixture string is NOT a usage", "next-themes", "tests/studio/something.py", - 'snippet = \'import x from "next-themes";\'', + "snippet = 'import x from \"next-themes\";'", None, ), ClassifyCase( @@ -675,7 +675,7 @@ class ClassifyCase: "JS import inside a YAML workflow is NOT classified as a JS usage", "next-themes", ".github/workflows/x.yml", - 'run: echo \'import x from "next-themes";\'', + "run: echo 'import x from \"next-themes\";'", None, ), # HTML script/link must respect package-name boundaries: a From 6b70e7211b11b48d3b29e8fc4d6539209a18cd38 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sat, 16 May 2026 11:05:58 +0000 Subject: [PATCH 13/14] ci: address 4x Opus reviewer findings on dep removal check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three blockers from the parallel Opus review batch: 1. scripts_bin_refs ignored every script that began with a wrapper. The original "first non-env token wins" heuristic credited cross-env / dotenv / dotenvx / env-cmd as the bin, so a script like `cross-env CI=1 biome check` left @biomejs/biome looking unused. Rewrote into _next_real_bin(), which peels env prefixes, the leading package-manager runner (npx / pnpx / bunx / pnpm exec / yarn dlx), and the known wrapper bins (with --/-flag-arg handling) before returning the real CLI. shlex tokenization preserves quoted env values like `FOO="a b"`. 2. enumerate_dep_usage skipped find_command_usage. The non-enumerate path already credited deps used only from CI / Dockerfile / shell scripts, but `--enumerate-dead` did not, so packages referenced only from a workflow were silently listed as dead. Added the same call (gated against @types/* to avoid the unscoped-tail false positive). 3. classify multi-line window was ±4 lines. Prettier formats long named-import lists one identifier per line, so a 20-import block pushed the `import` keyword out of the window and the dep dropped to the string-literal fallback (or worse, was missed entirely). Widened to ±25 -- still bounded enough to keep false-positives negligible, wide enough for the realistic Prettier ceiling. Tests: added 10 _next_real_bin unit cases + 4 scripts_bin_refs end-to-end cases (W01-W10 + I01-I04) and a 22-identifier multi-line import adversarial case (A13). Full suite: 125/125. --- scripts/check_frontend_dep_removal.py | 137 ++++++++++++--- tests/studio/test_frontend_dep_removal.py | 199 +++++++++++++++++++++- 2 files changed, 315 insertions(+), 21 deletions(-) diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index 4cbb07a546..c7c21b1deb 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -505,13 +505,104 @@ def build_bin_to_pkg(head_lock: dict) -> dict[str, str]: _SCRIPT_TOKENIZE = re.compile(r"\s*(?:&&|\|\||;|\|(?!\|))\s*") +# Wrappers that delegate to a real CLI in the same shell word list. +# After stripping env prefixes and (optionally) `npx`/`pnpm exec`/`yarn dlx`/ +# `bunx`, if the leading token is one of these we advance past the +# wrapper's own flags and any further env-prefix tokens, then re-check. +# `cross-env` is the common one; `dotenv-cli` / `dotenvx` use `--` as a +# separator. Wrappers that operate on named npm-scripts (concurrently, +# npm-run-all, run-s, run-p, wireit, turbo, nx) intentionally aren't +# here -- they reference script names, not bin names, so the real bin +# is in the *target* script's chunk which we already tokenize. +_SCRIPT_WRAPPERS = {"cross-env", "dotenv", "dotenvx", "env-cmd"} +_ENV_PREFIX_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=") + + +def _next_real_bin(words: list[str], idx: int) -> str | None: + """Walk `words` from `idx`, peeling env-prefix tokens, the leading + package-manager runner (`npx`, `pnpm exec`, etc.), and the known + wrapper bins. Return the next token that looks like the real CLI + binary, or None if the chunk has nothing to look up. + + Recursion depth is bounded by the chunk's word count, so the loop + cannot run away on a pathological wrapper chain. + """ + seen_wrappers: set[str] = set() + while idx < len(words): + # 1. env-prefix run: `FOO=bar BAZ="a b" cmd ...`. shlex has + # already collapsed quoted values into one word, so this + # tokenizer is safe for them. + while idx < len(words) and _ENV_PREFIX_RE.match(words[idx]): + idx += 1 + if idx >= len(words): + return None + + first = words[idx] + # 2. Package-manager runner: `npx args`, `pnpm exec `, + # `yarn dlx `, `bunx `. Strip and continue (so the + # wrapped command goes through the same unwrap loop). + if first in {"npx", "pnpx", "bunx"} and idx + 1 < len(words): + idx += 1 + continue + if ( + first in {"pnpm", "yarn"} + and idx + 2 < len(words) + and words[idx + 1] in {"exec", "dlx"} + ): + idx += 2 + continue + + # 3. Wrapper bin (cross-env, dotenv, etc.). Skip the wrapper's + # own flags and any subsequent env-prefix tokens, then re-loop. + bin_token = first.removeprefix("./node_modules/.bin/").removeprefix( + "node_modules/.bin/" + ) + if bin_token in _SCRIPT_WRAPPERS and bin_token not in seen_wrappers: + seen_wrappers.add(bin_token) + idx += 1 + # cross-env / env-cmd: no flags; just more env-prefix tokens. + # dotenv / dotenvx: skip `-e ` style flags and the + # optional `--` separator before the wrapped command. + while idx < len(words): + tok = words[idx] + if tok.startswith("-") and tok != "--": + idx += 1 + # `-e .env` style: also skip the flag's argument + # when it does not look like another flag. + if ( + idx < len(words) + and not words[idx].startswith("-") + and not _ENV_PREFIX_RE.match(words[idx]) + ): + idx += 1 + continue + if tok == "--": + idx += 1 + break + break + continue + return bin_token + return None + def scripts_bin_refs( head_pkg: dict, bin_to_pkg: dict[str, str] ) -> dict[str, list[str]]: """Return `{package_name: ['scripts.X: cmd', ...]}` listing every package referenced via its bin name in package.json scripts. + + Each script value is split on shell separators (`&&`, `||`, `;`, + `|`). Within each chunk, `_next_real_bin()` unwraps env prefixes, + package-manager runners (`npx` / `pnpm exec` / `yarn dlx` / `bunx`), + and wrapper bins like `cross-env` / `dotenv` so that + `cross-env CI=1 biome check` correctly credits `biome` to its + declaring package. + + Tokenization uses shlex.split so quoted env values + (`FOO="a b" biome`) survive unbroken. """ + import shlex + scripts = head_pkg.get("scripts", {}) or {} refs: dict[str, list[str]] = {} for script_name, raw_cmd in scripts.items(): @@ -521,25 +612,21 @@ def scripts_bin_refs( chunk = chunk.strip() if not chunk: continue - words = chunk.split() - idx = 0 - while idx < len(words) and re.match( - r"^[A-Za-z_][A-Za-z0-9_]*=", words[idx] - ): - idx += 1 - if idx >= len(words): + try: + words = shlex.split(chunk, posix = True) + except ValueError: + # Unbalanced quotes -- fall back to plain split. + words = chunk.split() + if not words: + continue + bin_name = _next_real_bin(words, 0) + if bin_name is None: continue - first = words[idx] - if first in {"npx", "pnpx", "yarn", "pnpm", "bunx"} and idx + 1 < len( - words - ): - idx += 1 - first = words[idx] - first = first.removeprefix("./node_modules/.bin/") - first = first.removeprefix("node_modules/.bin/") - pkg = bin_to_pkg.get(first) + pkg = bin_to_pkg.get(bin_name) if pkg: - refs.setdefault(pkg, []).append(f"scripts.{script_name}: {raw_cmd}") + refs.setdefault(pkg, []).append( + f"scripts.{script_name}: {raw_cmd}" + ) return refs @@ -618,6 +705,13 @@ def enumerate_dep_usage(head_pkg: dict, head_lock: dict) -> dict[str, list]: # Real-source-usage check hits = find_usage(name) used = bool(hits) + # CLI usage in shell / workflow / Dockerfile surfaces. Skip for + # `@types/*` packages because they never expose a CLI binary and + # the unscoped-tail bin name candidate would scan workflow files + # for the bare runtime name (a removed `@types/foo` would look + # for invocations of `foo`). + if not used and not name.startswith("@types/") and find_command_usage(name): + used = True # Bin scripts if not used and name in script_refs: used = True @@ -752,10 +846,13 @@ def find_usage(pkg: str) -> list[Hit]: # Try the single-line classify first. kind = classify(pkg, file, content) if not kind: - # Multi-line window: 4 lines above + the line itself + 4 below. + # Multi-line window: a generous 25 lines above + the line + + # 25 below so Prettier's one-import-per-line formatting for + # 12-20+ named imports still includes the `import` keyword + # in the same window as the `from "pkg"` clause. lines = _read_file(file) - lo = max(0, lineno - 5) - hi = min(len(lines), lineno + 4) + lo = max(0, lineno - 26) + hi = min(len(lines), lineno + 25) window = "\n".join(lines[lo:hi]) kind = classify(pkg, file, window) if kind: diff --git a/tests/studio/test_frontend_dep_removal.py b/tests/studio/test_frontend_dep_removal.py index 5b3218cf41..a8e4cda8b7 100644 --- a/tests/studio/test_frontend_dep_removal.py +++ b/tests/studio/test_frontend_dep_removal.py @@ -335,6 +335,8 @@ def run_case(case: Case, head_pkg: dict) -> tuple[bool, str]: sys.modules["_dep_check"] = _dep_check # required so @dataclass can resolve annotations _spec.loader.exec_module(_dep_check) classify = _dep_check.classify +_next_real_bin = _dep_check._next_real_bin +scripts_bin_refs = _dep_check.scripts_bin_refs @dataclass @@ -878,6 +880,24 @@ class AdvCase: "FAIL", ["__adv_only_pkg_l__"], ), + # Prettier formats a long named-import list one identifier per line. + # 22 imports + braces puts the `import` keyword ~22 lines away from + # the `from "pkg"` clause. Before the window widening, the classify + # multi-line fallback used ±4 lines, which silently missed every + # such block. This case fails with the old window and passes once + # the window is wide enough (currently ±25). + AdvCase( + "A13", + "Prettier-style 22-identifier multi-line import should FAIL " + "(exercises the widened multi-line classify window)", + "adv13.ts", + "import {\n" + + "".join(f" ident_{i:02d},\n" for i in range(22)) + + '} from "__adv_only_pkg_m__";\n', + "__adv_only_pkg_m__", + "FAIL", + ["__adv_only_pkg_m__"], + ), ] @@ -1384,6 +1404,168 @@ def run_enum_cases() -> int: return 0 if passed == len(ENUM_CASES) else 1 +# --------------------------------------------------------------------------- +# Script-wrapper cases: exercise scripts_bin_refs / _next_real_bin so a +# package.json script like `cross-env CI=1 biome check` correctly credits +# `@biomejs/biome` rather than the wrapper itself. The 10x reviewer flagged +# the original "first non-env token" heuristic as too narrow: any project +# using cross-env / dotenv / dotenvx / env-cmd / a quoted env value would +# bypass the bin-name check. +# --------------------------------------------------------------------------- + + +@dataclass +class WrapperCase: + id: str + desc: str + raw_cmd: str + expected_bin: str | None # None means "no real bin (e.g. unwrappable)" + + +WRAPPER_CASES: list[WrapperCase] = [ + WrapperCase( + "W01", + "cross-env wraps the real bin", + "cross-env CI=1 biome check .", + "biome", + ), + WrapperCase( + "W02", + "cross-env with multiple env tokens after the wrapper", + "cross-env A=1 B=2 NODE_ENV=prod biome check", + "biome", + ), + WrapperCase( + "W03", + "bare env-prefix run (no wrapper) still peels the env tokens", + "FOO=bar biome check", + "biome", + ), + WrapperCase( + "W04", + "quoted env value with spaces (shlex preserves it as one word)", + 'FOO="a b" biome check', + "biome", + ), + WrapperCase( + "W05", + "npx + cross-env: runner peels, wrapper peels, real bin wins", + "npx cross-env CI=1 biome check", + "biome", + ), + WrapperCase( + "W06", + "pnpm exec + cross-env", + "pnpm exec cross-env CI=1 biome check", + "biome", + ), + WrapperCase( + "W07", + "dotenv with the `--` separator before the wrapped command", + "dotenv -- biome check", + "biome", + ), + WrapperCase( + "W08", + "dotenv with a flag-arg pair and `--` separator", + "dotenv -e .env -- biome check", + "biome", + ), + WrapperCase( + "W09", + "leading `./node_modules/.bin/` prefix is stripped", + "./node_modules/.bin/biome check", + "biome", + ), + WrapperCase( + "W10", + "concurrently is NOT a script wrapper -- it dispatches by " + "script *name*, not bin, so the real bin is `concurrently` " + "itself (the wrapped script names are credited by their own " + "scripts entries, which scripts_bin_refs iterates separately)", + 'concurrently "npm:dev" "npm:typecheck"', + "concurrently", + ), +] + + +def run_wrapper_cases() -> int: + import shlex + + passed = 0 + for wc in WRAPPER_CASES: + try: + words = shlex.split(wc.raw_cmd, posix = True) + except ValueError: + words = wc.raw_cmd.split() + actual = _next_real_bin(words, 0) + ok = actual == wc.expected_bin + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {wc.id}: {wc.desc}") + if not ok: + print(f" raw_cmd={wc.raw_cmd!r}") + print(f" expected={wc.expected_bin!r}, actual={actual!r}") + if ok: + passed += 1 + + # End-to-end integration: feed scripts_bin_refs a synthetic head_pkg + # whose scripts use a wrapper, and confirm the package owning the + # wrapped bin is credited (rather than the wrapper). This is the + # actual call path used by find_command_usage(). + int_total = 0 + int_passed = 0 + int_cases = [ + ( + "I01", + "cross-env wrapping `biome` credits @biomejs/biome", + {"lint": "cross-env CI=1 biome check"}, + {"biome": "@biomejs/biome"}, + "@biomejs/biome", + ), + ( + "I02", + "dotenv -- biome credits @biomejs/biome", + {"lint": "dotenv -- biome check"}, + {"biome": "@biomejs/biome"}, + "@biomejs/biome", + ), + ( + "I03", + "quoted env value before bin still credits the bin's owner", + {"lint": 'FOO="a b" biome check .'}, + {"biome": "@biomejs/biome"}, + "@biomejs/biome", + ), + ( + "I04", + "&& chain: both halves credit their owning packages", + {"build": "tsc -b && cross-env CI=1 biome check"}, + {"tsc": "typescript", "biome": "@biomejs/biome"}, + None, # checked via owning_pkgs below + ), + ] + for case_id, desc, scripts, bin_to_pkg, expect_owner in int_cases: + int_total += 1 + refs = scripts_bin_refs({"scripts": scripts}, bin_to_pkg) + if case_id == "I04": + owners = set(refs.keys()) + ok = owners == {"typescript", "@biomejs/biome"} + else: + ok = expect_owner in refs + mark = "PASS" if ok else "FAIL" + print(f" [{mark}] {case_id}: {desc}") + if not ok: + print(f" scripts={scripts!r} bin_to_pkg={bin_to_pkg!r}") + print(f" refs={refs!r}") + if ok: + int_passed += 1 + + total = len(WRAPPER_CASES) + int_total + print() + print(f"{passed + int_passed}/{total} wrapper-script cases pass") + return 0 if (passed == len(WRAPPER_CASES) and int_passed == int_total) else 1 + + def main() -> int: head_pkg = json.loads(HEAD_PKG.read_text()) print(f"Running {len(CASES)} edge cases against {SCRIPT.relative_to(REPO)}") @@ -1422,7 +1604,22 @@ def main() -> int: print() enum_rc = run_enum_cases() - if passed == total and cls_rc == 0 and adv_rc == 0 and pkg_rc == 0 and enum_rc == 0: + print() + print( + f"Running {len(WRAPPER_CASES)} script-wrapper cases " + "(_next_real_bin + scripts_bin_refs end-to-end)" + ) + print() + wrap_rc = run_wrapper_cases() + + if ( + passed == total + and cls_rc == 0 + and adv_rc == 0 + and pkg_rc == 0 + and enum_rc == 0 + and wrap_rc == 0 + ): return 0 return 1 From 418b63b7cd3c1443baa284b421d17aa94646e5f0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 16 May 2026 11:06:13 +0000 Subject: [PATCH 14/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/check_frontend_dep_removal.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/check_frontend_dep_removal.py b/scripts/check_frontend_dep_removal.py index c7c21b1deb..260ad5215a 100644 --- a/scripts/check_frontend_dep_removal.py +++ b/scripts/check_frontend_dep_removal.py @@ -624,9 +624,7 @@ def scripts_bin_refs( continue pkg = bin_to_pkg.get(bin_name) if pkg: - refs.setdefault(pkg, []).append( - f"scripts.{script_name}: {raw_cmd}" - ) + refs.setdefault(pkg, []).append(f"scripts.{script_name}: {raw_cmd}") return refs