diff --git a/.claude/rules/testing.md b/.claude/rules/testing.md index 325766ba7..6363ddf16 100644 --- a/.claude/rules/testing.md +++ b/.claude/rules/testing.md @@ -16,3 +16,4 @@ paths: - **Never `Math.random()`** in stats tests — use seeded PRNG helpers. - **E2E selectors: `data-testid`** only — text/role/class change with i18n + theme. - Known flaky: `packages/hooks/src/__tests__/index.test.ts` under concurrent Turbo load — passes in isolation; retry once before treating as failure. +- **Architecture tests** (structural-absence guards): see `docs/05-technical/implementation/testing.md` "Architecture Tests (Structural-Absence Guards)" — read-once + per-name regex pattern, denylist limits, branded-type follow-up. diff --git a/docs/05-technical/implementation/testing.md b/docs/05-technical/implementation/testing.md index 7a6c498fa..8e4822f5e 100644 --- a/docs/05-technical/implementation/testing.md +++ b/docs/05-technical/implementation/testing.md @@ -952,4 +952,140 @@ Expected behavior: Reference implementation: `packages/core/src/stats/__tests__/safeMath.test.ts` +--- + +## Architecture Tests (Structural-Absence Guards) + +An architecture test enforces an architectural invariant by asserting that certain function names, identifiers, or import paths do **not** appear in production code. The test file lives alongside ordinary unit tests; CI catches violations at merge time. The rule is enforced at-write time, not at design-doc-review time. + +This is a tripwire, not a wall. The pattern is cheap to add when an ADR needs enforcement but type-level investment isn't yet justified. + +### When to use it + +- An ADR says "the engine MUST NOT expose function X" or "package A MUST NOT import package B." +- The cost of enforcement is high enough that a doc-only ADR gets violated in practice. LLM-assisted development is a particular driver — language models reach for plausible function names (`aggregateCpk`, `rollupCapability`) that the ADR explicitly forbids. +- Type-level enforcement isn't justified yet: the rule isn't ubiquitous enough, or the engineering investment for branded types is too high for this moment. + +### When NOT to use it + +- **The rule is about semantic correctness, not naming.** A grep for `aggregateCpk` doesn't catch `unifiedQualityIndex()` doing the same forbidden math under a different name. +- **The rule deserves type-level enforcement and you have the budget.** Prefer branded types (like `ProcessHubId`) when the engineering cost is justified. The denylist is the interim measure, not the destination. +- **The rule needs cross-package enforcement.** A per-package vitest guard has narrow scope. Cross-package rules require either coordinating multiple test files or writing a repo-wide bash script (see `scripts/check-level-boundaries.sh` below). + +### Implementation pattern + +The canonical example is `packages/core/src/__tests__/architecture.noCrossInvestigationAggregation.test.ts` (ADR-073). The critical discipline: **read source files once, scan many times**. + +```typescript +import { describe, it, expect, beforeAll } from 'vitest'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; + +const FORBIDDEN_NAMES = [ + 'aggregateCpk', + 'aggregateCapability', + 'meanCapability', + 'sumCpk', + 'portfolioCpk', + // ...16 names total +]; + +const CORE_SRC = path.resolve(import.meta.dirname, '..'); + +function listTypeScriptFiles(dir: string): string[] { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + const out: string[] = []; + for (const entry of entries) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) { + if (entry.name === '__tests__') continue; // skip test dirs + out.push(...listTypeScriptFiles(full)); + continue; + } + if (!entry.isFile()) continue; + if (!entry.name.endsWith('.ts') && !entry.name.endsWith('.tsx')) continue; + if (entry.name.endsWith('.test.ts') || entry.name.endsWith('.test.tsx')) continue; + out.push(full); + } + return out; +} + +// Read-once cache: ~310 files read into memory in beforeAll. +// Each it() block scans cached strings, not disk. +// Avoids ~5000 sync readFileSync calls (16 names × ~310 files) that +// cause IO-induced timeout flakes under turbo concurrent load. +type CachedFile = { path: string; text: string }; + +describe('Architecture — no cross-investigation Cp/Cpk aggregation primitive', () => { + let cache: CachedFile[]; + + beforeAll(() => { + const paths = listTypeScriptFiles(CORE_SRC); + cache = paths.map(p => ({ path: p, text: fs.readFileSync(p, 'utf8') })); + }); + + for (const name of FORBIDDEN_NAMES) { + it(`does not declare or reference "${name}" in @variscout/core (excluding tests)`, () => { + // Whole-word regex: avoids false positives on longer names that + // contain a forbidden substring. + const pattern = new RegExp(`(^|\\W)${name}(?=\\W|$)`); + const hits = cache.filter(f => pattern.test(f.text)).map(f => f.path); + expect(hits, `"${name}" appears in:\n ${hits.join('\n ')}`).toEqual([]); + }); + } +}); +``` + +Key decisions in this pattern: + +- **List source files once** via `fs.readdirSync` with type-filter (`.ts` / `.tsx`, skip tests, skip `__tests__/` directories). +- **Read each file once** into a `CachedFile[]` in `beforeAll`. Do not re-read inside `it()` blocks. +- **One `it()` per forbidden name.** Keeps failures readable: a violation reports exactly which name leaked and in which files. +- **Whole-word regex** (`/(^|\W)NAME(?=\W|$)/`) reduces false positives from longer names that happen to contain a forbidden substring. + +**Anti-pattern:** Re-reading files inside each `it()` block. With 16 forbidden names and ~310 source files, that is ~5000 synchronous `readFileSync` calls per test run. Under turbo's concurrent worker load this exceeds the 5 s per-test timeout intermittently. The read-once + per-name regex pattern (commit `06d2638a`) eliminates the flake; no retry workaround is needed. + +### Bash variant (cross-package, pre-commit) + +For cross-package boundary rules, a bash script is often simpler than coordinating multiple vitest files. Example: `scripts/check-level-boundaries.sh` (ADR-074). It uses `grep -rEq` against specific component directories and runs in the pre-commit hook: + +```bash +check() { + local pattern="$1" + local target="$2" + local message="$3" + if [ -d "$target" ]; then + if grep -rEq "$pattern" "$target" 2>/dev/null; then + echo " ✗ $message" >&2 + FAILED=$((FAILED + 1)) + fi + fi +} + +check "outcomeStats|outcomeBoxplot|outcomeIChart" \ + "packages/ui/src/components/InvestigationWall" \ + "Investigation Wall does not reimplement L1 chart rendering" +``` + +The bash approach has the same semantic limits as the vitest approach (substring pattern, not AST), but covers multiple packages in one pass. + +### Explicit limits of this pattern + +- **Denylist, not allowlist.** Anyone can rename around the forbidden list. The guard catches the obvious case and forces intentional naming; it does not prevent a creative renaming that avoids every listed identifier. +- **Substring grep, not AST.** The whole-word regex reduces false positives, but cannot understand semantics. A function named `unifiedQualityIndex()` doing forbidden cross-investigation aggregation would pass cleanly. +- **Narrow scope.** The vitest guard scans only the package it lives in. Cross-package aggregation introduced in `packages/charts`, `packages/ui`, or apps is invisible to it. The bash script addresses this for the ADR-074 boundaries specifically. +- **Maintenance burden.** Every synonym is one more line. The list can never be complete; reality has more names than you will think of at design time. +- **Comments match too.** A code comment `// don't use aggregateCpk` would trigger the pattern. In practice this is harmless (the comment is an acknowledgment, not an introduction), but it is worth knowing. + +### Existing instances in this codebase + +| File | Mechanism | Enforces | Runs in | +| :--------------------------------------------------------------------------------- | :----------------- | :---------------------------------------------------------------------------------------------------------------- | :-------------- | +| `packages/core/src/__tests__/architecture.noCrossInvestigationAggregation.test.ts` | vitest | ADR-073 (Watson's locality rule — no cross-investigation Cp/Cpk aggregation); 16 forbidden names | `pnpm test` | +| `scripts/check-level-boundaries.sh` | bash + `grep -rEq` | ADR-074 (multi-level boundary policy — SCOUT / Evidence-Map / Hub-Capability layer separation); 5 boundary checks | pre-commit hook | + +### The durable answer + +Architecture tests are interim enforcement. When a rule earns full enforcement — because violations are expensive and the rule is stable — prefer type-level primitives: branded types, policy types, or package-boundary ESLint rules. `ProcessHubId` is the existing example of this upgrade path. See `docs/investigations.md` "Branded Cpk type as durable replacement for forbidden-name guard" for the proposed follow-up targeting the ADR-073 guard specifically. + - [Technical Overview](../index.md) - Technical section index diff --git a/docs/investigations.md b/docs/investigations.md index cf914459a..b35712465 100644 --- a/docs/investigations.md +++ b/docs/investigations.md @@ -176,6 +176,65 @@ that supply vite globals. Runtime behaviour is unaffected. --- +### Branded Cpk type as durable replacement for forbidden-name guard + +**Surfaced by:** post-#168 architecture-test refactor workstream (branch `post-168-architecture-test-refactor`), 2026-05-14. Related to T2 refactor commit `06d2638a`. + +**Description:** The architecture test at +`packages/core/src/__tests__/architecture.noCrossInvestigationAggregation.test.ts` (enforcing +ADR-073, "no cross-investigation Cp/Cpk aggregation") is a tripwire: a denylist substring grep for +16 forbidden function names (`aggregateCpk`, `aggregateCapability`, `rollupCpk`, etc.). It catches +the obvious case where a contributor reaches for `aggregateCpk`, but it has real limits: + +- A creative renaming (`unifiedQualityIndex()`, `combinedProcessMetric()`) passes cleanly — the rule + degrades to a naming convention, not an architectural boundary. +- It is a substring grep, not AST analysis; semantics are invisible to it. +- Scope is narrow: the vitest guard scans only `@variscout/core`. Cross-investigation aggregation + introduced in `packages/charts`, `packages/ui`, or apps is not caught. +- The denylist can never be complete; reality has more names than any human (or language model) will + enumerate at design time. + +The same architectural rule could be enforced TYPE-LEVEL by making `Cpk` an opaque branded type — +analogous to `ProcessHubId` in `packages/core/src/processHub.ts` (introduced in PR #168). That type +is defined as `type ProcessHubId = string & { readonly __brand: 'ProcessHubId' }` with a single +typed constructor `asProcessHubId()` that throws on empty input. With the same pattern applied to +`Cpk`, multi-spec arithmetic becomes a compile error, not a runtime denylist match. + +**Possible directions:** + +- **Branded `Cpk` type in `@variscout/core`.** Define `Cpk` as an opaque type (`type Cpk = number & +{ readonly __brand: 'Cpk' }`) whose only constructor takes a single-`SpecRule` context — i.e., one + spec, one investigation, one step. Forbid helpers that return `Cpk[]` from mixed-investigation + inputs; allow only `Map` or `Map` where the map keys preserve + the locality dimension and prevent arithmetic across keys. Apply the same pattern to `Cp`, `Pp`, + and `Ppk` if they share the ADR-073 constraint. + +- **Migrate consumers.** All ~30+ call sites that read or produce `Cpk` values become typed. The + display layer already goes through `formatStatistic()` (in `@variscout/core/i18n`) — that boundary + is already clean. The engine layer is the migration target: replace bare `number` returns with + typed-constructor calls. + +- **Delete the architecture-grep test** once the type-level enforcement is in place. The substring + guard becomes redundant when the type system prevents the violation. Update ADR-073 with an + amendment note: "enforced by branded `Cpk` type; the historical forbidden-name guard at + `architecture.noCrossInvestigationAggregation.test.ts` is removed." + +**Why it matters:** LLM-assisted development is especially good at "obvious" naming — and especially +good at picking novel-but-semantically-equivalent names when the obvious ones are blocked. A denylist +that a language model can route around in one creative step is a thin safety layer. Type-level +enforcement removes the routing option entirely. + +**Estimated scope:** Real engineering effort, not hygiene. Probably 4–8 tasks across: +`@variscout/core` (type definition + typed constructor + engine-layer consumer migration), +`@variscout/ui` + apps (display-layer migration via the existing `formatStatistic` pathway), +and tests (fixture + assertion updates). Not a single-PR cleanup. + +**Promotion path:** When the engineering budget appears — likely as part of a broader stats-engine +type-cleanup pass — this becomes an ADR-073 amendment + a small design spec + a multi-PR migration. +Until then: stays as a logged investigation. The current tripwire remains the enforcement mechanism. + +--- + ### Stats-bar "Set specs →" link reads project-wide specs only **Surfaced by:** FRAME b0 spec wiring fixes, 2026-05-03 (branch `feature/full-vision-frame-b0`). diff --git a/packages/core/src/__tests__/architecture.noCrossInvestigationAggregation.test.ts b/packages/core/src/__tests__/architecture.noCrossInvestigationAggregation.test.ts index aaa55f58d..9418d85e3 100644 --- a/packages/core/src/__tests__/architecture.noCrossInvestigationAggregation.test.ts +++ b/packages/core/src/__tests__/architecture.noCrossInvestigationAggregation.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, beforeAll } from 'vitest'; import * as fs from 'node:fs'; import * as path from 'node:path'; @@ -19,6 +19,22 @@ import * as path from 'node:path'; * which the spec permits. Forbidding it would collide with legitimate * within-investigation aggregation. The cross-investigation hazard the * spec targets is captured by the explicit cross/portfolio/global variants. + * + * ## This is a tripwire, not a wall. + * + * Architecture tests catch the obvious case (a contributor — human or + * LLM-assisted — reaches for `aggregateCpk` or `sumCpk`) and force + * intentional renaming OR a design rethink. They do NOT enforce + * semantic correctness: a function named `unifiedQualityIndex()` doing + * exactly the forbidden aggregation would pass this test. And the scope + * is narrow — this guard scans only `@variscout/core`; cross-package + * aggregations introduced in `packages/charts`, `packages/ui`, or apps + * would not be caught. + * + * The durable answer for any rule that earns full enforcement is + * type-level (branded types, ADR-074-style policy types). See + * `docs/investigations.md` "Branded Cpk type as durable replacement" + * for the proposed follow-up. */ const FORBIDDEN_NAMES = [ // Spec's own verification command names @@ -71,25 +87,29 @@ function listTypeScriptFiles(dir: string): string[] { return out; } -function findHits(name: string, files: readonly string[]): string[] { - // Whole-word match: name preceded by non-word char (or start of file) and - // followed by non-word char (or end of file). Avoids false positives on - // longer names that happen to contain a forbidden substring. - const pattern = new RegExp(`(^|\\W)${name}(?=\\W|$)`); - const hits: string[] = []; - for (const file of files) { - const text = fs.readFileSync(file, 'utf8'); - if (pattern.test(text)) hits.push(file); - } - return hits; -} +// Read-once cache: all ~310 source files are read into memory once in +// beforeAll. Each it() block scans the cached strings, not the disk. +// This reduces ~5000 synchronous readFileSync calls (16 names × ~310 files) +// to ~310 reads total, eliminating the IO-induced timeout flake under +// turbo concurrent load (see docs/investigations.md +// "Branded Cpk type as durable replacement" for the durable reference). +type CachedFile = { path: string; text: string }; describe('Architecture — no cross-investigation Cp/Cpk aggregation primitive', () => { - const files = listTypeScriptFiles(CORE_SRC); + let cache: CachedFile[]; + + beforeAll(() => { + const paths = listTypeScriptFiles(CORE_SRC); + cache = paths.map(p => ({ path: p, text: fs.readFileSync(p, 'utf8') })); + }); for (const name of FORBIDDEN_NAMES) { it(`does not declare or reference "${name}" anywhere in @variscout/core (excluding tests)`, () => { - const hits = findHits(name, files); + // Whole-word match: name preceded by non-word char (or start of file) + // and followed by non-word char (or end of file). Avoids false positives + // on longer names that happen to contain a forbidden substring. + const pattern = new RegExp(`(^|\\W)${name}(?=\\W|$)`); + const hits = cache.filter(f => pattern.test(f.text)).map(f => f.path); expect(hits, `Forbidden name "${name}" appears in:\n ${hits.join('\n ')}`).toEqual([]); }); }