diff --git a/src/core/engine/scoring.test.ts b/src/core/engine/scoring.test.ts index 6a6b8087..d019f4ce 100644 --- a/src/core/engine/scoring.test.ts +++ b/src/core/engine/scoring.test.ts @@ -86,9 +86,9 @@ describe("calculateScores", () => { expect(scores.summary.totalIssues).toBe(4); }); - it("uses calculatedScore for density: higher score = more density impact", () => { + it("uses base rule score with sqrt damping for density (#226)", () => { const heavyIssue = makeIssue({ ruleId: "no-auto-layout", category: "pixel-critical", severity: "blocking", score: -10 }); - heavyIssue.calculatedScore = -15; // Simulate depthWeight effect + heavyIssue.calculatedScore = -15; // calculatedScore ignored for density — uses base score (-10), so |−10| × sqrt(1) = 10 const lightIssue = makeIssue({ ruleId: "non-semantic-name", category: "semantic", severity: "suggestion", score: -1 }); lightIssue.calculatedScore = -1; @@ -96,16 +96,14 @@ describe("calculateScores", () => { const heavy = calculateScores(makeResult([heavyIssue], 100)); const light = calculateScores(makeResult([lightIssue], 100)); - expect(heavy.byCategory["pixel-critical"].weightedIssueCount).toBe(15); + // sqrt(1) = 1, so 1 issue = base score directly + expect(heavy.byCategory["pixel-critical"].weightedIssueCount).toBe(10); expect(light.byCategory["semantic"].weightedIssueCount).toBe(1); }); - it("differentiates rules within the same severity by score", () => { + it("differentiates rules within the same severity by base score", () => { const highScoreIssue = makeIssue({ ruleId: "no-auto-layout", category: "pixel-critical", severity: "blocking", score: -10 }); - highScoreIssue.calculatedScore = -15; - const lowScoreIssue = makeIssue({ ruleId: "absolute-position-in-auto-layout", category: "pixel-critical", severity: "blocking", score: -3 }); - lowScoreIssue.calculatedScore = -5; const highScore = calculateScores(makeResult([highScoreIssue], 100)); const lowScore = calculateScores(makeResult([lowScoreIssue], 100)); @@ -113,8 +111,9 @@ describe("calculateScores", () => { expect(highScore.byCategory["pixel-critical"].densityScore).toBeLessThan( lowScore.byCategory["pixel-critical"].densityScore ); - expect(highScore.byCategory["pixel-critical"].weightedIssueCount).toBe(15); - expect(lowScore.byCategory["pixel-critical"].weightedIssueCount).toBe(5); + // sqrt(1) = 1, so single issue = base score + expect(highScore.byCategory["pixel-critical"].weightedIssueCount).toBe(10); + expect(lowScore.byCategory["pixel-critical"].weightedIssueCount).toBe(3); }); it("density score decreases as weighted issue count increases relative to node count", () => { @@ -133,6 +132,18 @@ describe("calculateScores", () => { expect(many.byCategory["pixel-critical"].densityScore).toBeLessThan( few.byCategory["pixel-critical"].densityScore ); + // Verify sqrt damping: 5 issues of score -5 → 5 × sqrt(5) ≈ 11.18 + expect(many.byCategory["pixel-critical"].weightedIssueCount).toBeCloseTo(5 * Math.sqrt(5), 1); + }); + + it("applies sqrt damping independently per rule", () => { + const issues = [ + ...Array.from({ length: 4 }, () => makeIssue({ ruleId: "no-auto-layout", category: "pixel-critical", severity: "blocking", score: -5 })), + ...Array.from({ length: 9 }, () => makeIssue({ ruleId: "non-layout-container", category: "pixel-critical", severity: "risk", score: -3 })), + ]; + const scores = calculateScores(makeResult(issues, 100)); + // 5×sqrt(4) + 3×sqrt(9) = 10 + 9 = 19 + expect(scores.byCategory["pixel-critical"].weightedIssueCount).toBeCloseTo(19, 1); }); it("diversity score penalizes more unique rules being triggered", () => { diff --git a/src/core/engine/scoring.ts b/src/core/engine/scoring.ts index 42c9d335..06a3e827 100644 --- a/src/core/engine/scoring.ts +++ b/src/core/engine/scoring.ts @@ -49,16 +49,17 @@ export interface ScoreReport { export type Grade = "S" | "A+" | "A" | "B+" | "B" | "C+" | "C" | "D" | "F"; /** - * Density weighting now uses per-rule `calculatedScore` from the rule engine, - * which incorporates both the calibrated rule score and depthWeight. + * Density weighting uses per-rule base |score| with sqrt damping (#226). * - * Previously, flat severity weights (blocking=3.0, risk=2.0, etc.) were used, - * making all rules within the same severity contribute equally and rendering - * the per-rule scores in rule-config.ts effectively unused. + * Previously, each issue's |calculatedScore| was summed linearly — a rule + * triggering N times contributed N× its score. This over-penalized designs + * with many instances of the same issue (e.g., raw-value ×79 = -316 weight), + * causing most real files to score Grade D/F with no differentiation. * - * Now: `no-auto-layout` (score: -10, depthWeight: 1.5) at root contributes 15 - * to density, while `non-semantic-name` (score: -4, no depthWeight) contributes 4. - * This makes calibration loop score adjustments flow through to user-facing scores. + * Now: same rule triggered N times contributes |score| × sqrt(N). + * raw-value ×79 = 4 × sqrt(79) ≈ 36 instead of 316. + * At low counts sqrt ≈ linear (sqrt(1)=1, sqrt(4)=2), preserving sensitivity. + * At high counts the curve flattens — 79th occurrence adds ~0.2 vs 1st adding 4. * * Category weights removed (#196) — overall score is simple average of categories. * Category importance is already encoded in rule scores (pixel-critical -10 @@ -170,9 +171,12 @@ export function calculateScores( // Track unique rules and their base |score| per category const uniqueRulesPerCategory = new Map>(); const ruleScorePerCategory = new Map>(); + // Track issue counts per rule per category for sqrt damping (#226) + const ruleIssueCountPerCategory = new Map>(); for (const category of CATEGORIES) { uniqueRulesPerCategory.set(category, new Set()); ruleScorePerCategory.set(category, new Map()); + ruleIssueCountPerCategory.set(category, new Map()); } // Compute totals from the config map. @@ -189,9 +193,27 @@ export function calculateScores( categoryScores[category].issueCount++; categoryScores[category].bySeverity[severity]++; - categoryScores[category].weightedIssueCount += Math.abs(issue.calculatedScore); uniqueRulesPerCategory.get(category)!.add(ruleId); ruleScorePerCategory.get(category)!.set(ruleId, Math.abs(issue.config.score)); + // Accumulate per-rule issue count (using |calculatedScore| as weight unit) + const ruleCountMap = ruleIssueCountPerCategory.get(category)!; + ruleCountMap.set(ruleId, (ruleCountMap.get(ruleId) ?? 0) + 1); + } + + // Compute weightedIssueCount with sqrt damping per rule (#226). + // Same rule triggered N times contributes |score| × sqrt(N) instead of |score| × N. + // Rationale: 79 raw-value issues represent one systemic problem, not 79× the difficulty. + // First few occurrences identify the problem; subsequent ones have diminishing impact. + // At low counts sqrt ≈ linear (sqrt(1)=1, sqrt(4)=2), preserving sensitivity for rare issues. + for (const category of CATEGORIES) { + const ruleCountMap = ruleIssueCountPerCategory.get(category)!; + const ruleScoreMap = ruleScorePerCategory.get(category)!; + let dampedWeight = 0; + for (const [ruleId, count] of ruleCountMap) { + const ruleScore = ruleScoreMap.get(ruleId) ?? 0; + dampedWeight += ruleScore * Math.sqrt(count); + } + categoryScores[category].weightedIssueCount = dampedWeight; } // Calculate percentage for each category based on density + diversity