Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions src/core/engine/scoring.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,35 +86,34 @@ describe("calculateScores", () => {
expect(scores.summary.totalIssues).toBe(4);
});

it("uses calculatedScore for density: higher score = more density impact", () => {
it("uses base rule score with sqrt damping for density (#226)", () => {
const heavyIssue = makeIssue({ ruleId: "no-auto-layout", category: "pixel-critical", severity: "blocking", score: -10 });
heavyIssue.calculatedScore = -15; // Simulate depthWeight effect
heavyIssue.calculatedScore = -15; // calculatedScore ignored for density — uses base score (-10), so |−10| × sqrt(1) = 10

const lightIssue = makeIssue({ ruleId: "non-semantic-name", category: "semantic", severity: "suggestion", score: -1 });
lightIssue.calculatedScore = -1;

const heavy = calculateScores(makeResult([heavyIssue], 100));
const light = calculateScores(makeResult([lightIssue], 100));

expect(heavy.byCategory["pixel-critical"].weightedIssueCount).toBe(15);
// sqrt(1) = 1, so 1 issue = base score directly
expect(heavy.byCategory["pixel-critical"].weightedIssueCount).toBe(10);
expect(light.byCategory["semantic"].weightedIssueCount).toBe(1);
});

it("differentiates rules within the same severity by score", () => {
it("differentiates rules within the same severity by base score", () => {
const highScoreIssue = makeIssue({ ruleId: "no-auto-layout", category: "pixel-critical", severity: "blocking", score: -10 });
highScoreIssue.calculatedScore = -15;

const lowScoreIssue = makeIssue({ ruleId: "absolute-position-in-auto-layout", category: "pixel-critical", severity: "blocking", score: -3 });
lowScoreIssue.calculatedScore = -5;

const highScore = calculateScores(makeResult([highScoreIssue], 100));
const lowScore = calculateScores(makeResult([lowScoreIssue], 100));

expect(highScore.byCategory["pixel-critical"].densityScore).toBeLessThan(
lowScore.byCategory["pixel-critical"].densityScore
);
expect(highScore.byCategory["pixel-critical"].weightedIssueCount).toBe(15);
expect(lowScore.byCategory["pixel-critical"].weightedIssueCount).toBe(5);
// sqrt(1) = 1, so single issue = base score
expect(highScore.byCategory["pixel-critical"].weightedIssueCount).toBe(10);
expect(lowScore.byCategory["pixel-critical"].weightedIssueCount).toBe(3);
});

it("density score decreases as weighted issue count increases relative to node count", () => {
Expand All @@ -133,6 +132,18 @@ describe("calculateScores", () => {
expect(many.byCategory["pixel-critical"].densityScore).toBeLessThan(
few.byCategory["pixel-critical"].densityScore
);
// Verify sqrt damping: 5 issues of score -5 → 5 × sqrt(5) ≈ 11.18
expect(many.byCategory["pixel-critical"].weightedIssueCount).toBeCloseTo(5 * Math.sqrt(5), 1);
});

it("applies sqrt damping independently per rule", () => {
const issues = [
...Array.from({ length: 4 }, () => makeIssue({ ruleId: "no-auto-layout", category: "pixel-critical", severity: "blocking", score: -5 })),
...Array.from({ length: 9 }, () => makeIssue({ ruleId: "non-layout-container", category: "pixel-critical", severity: "risk", score: -3 })),
];
const scores = calculateScores(makeResult(issues, 100));
// 5×sqrt(4) + 3×sqrt(9) = 10 + 9 = 19
expect(scores.byCategory["pixel-critical"].weightedIssueCount).toBeCloseTo(19, 1);
});

it("diversity score penalizes more unique rules being triggered", () => {
Expand Down
40 changes: 31 additions & 9 deletions src/core/engine/scoring.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ export interface ScoreReport {
export type Grade = "S" | "A+" | "A" | "B+" | "B" | "C+" | "C" | "D" | "F";

/**
* Density weighting now uses per-rule `calculatedScore` from the rule engine,
* which incorporates both the calibrated rule score and depthWeight.
* Density weighting uses per-rule base |score| with sqrt damping (#226).
*
* Previously, flat severity weights (blocking=3.0, risk=2.0, etc.) were used,
* making all rules within the same severity contribute equally and rendering
* the per-rule scores in rule-config.ts effectively unused.
* Previously, each issue's |calculatedScore| was summed linearly — a rule
* triggering N times contributed N× its score. This over-penalized designs
* with many instances of the same issue (e.g., raw-value ×79 = -316 weight),
* causing most real files to score Grade D/F with no differentiation.
*
* Now: `no-auto-layout` (score: -10, depthWeight: 1.5) at root contributes 15
* to density, while `non-semantic-name` (score: -4, no depthWeight) contributes 4.
* This makes calibration loop score adjustments flow through to user-facing scores.
* Now: same rule triggered N times contributes |score| × sqrt(N).
* raw-value ×79 = 4 × sqrt(79) ≈ 36 instead of 316.
* At low counts sqrt ≈ linear (sqrt(1)=1, sqrt(4)=2), preserving sensitivity.
* At high counts the curve flattens — 79th occurrence adds ~0.2 vs 1st adding 4.
*
* Category weights removed (#196) — overall score is simple average of categories.
* Category importance is already encoded in rule scores (pixel-critical -10
Expand Down Expand Up @@ -170,9 +171,12 @@ export function calculateScores(
// Track unique rules and their base |score| per category
const uniqueRulesPerCategory = new Map<Category, Set<string>>();
const ruleScorePerCategory = new Map<Category, Map<string, number>>();
// Track issue counts per rule per category for sqrt damping (#226)
const ruleIssueCountPerCategory = new Map<Category, Map<string, number>>();
for (const category of CATEGORIES) {
uniqueRulesPerCategory.set(category, new Set());
ruleScorePerCategory.set(category, new Map());
ruleIssueCountPerCategory.set(category, new Map());
}

// Compute totals from the config map.
Expand All @@ -189,9 +193,27 @@ export function calculateScores(

categoryScores[category].issueCount++;
categoryScores[category].bySeverity[severity]++;
categoryScores[category].weightedIssueCount += Math.abs(issue.calculatedScore);
uniqueRulesPerCategory.get(category)!.add(ruleId);
ruleScorePerCategory.get(category)!.set(ruleId, Math.abs(issue.config.score));
// Accumulate per-rule issue count (using |calculatedScore| as weight unit)
const ruleCountMap = ruleIssueCountPerCategory.get(category)!;
ruleCountMap.set(ruleId, (ruleCountMap.get(ruleId) ?? 0) + 1);
}

// Compute weightedIssueCount with sqrt damping per rule (#226).
// Same rule triggered N times contributes |score| × sqrt(N) instead of |score| × N.
// Rationale: 79 raw-value issues represent one systemic problem, not 79× the difficulty.
// First few occurrences identify the problem; subsequent ones have diminishing impact.
// At low counts sqrt ≈ linear (sqrt(1)=1, sqrt(4)=2), preserving sensitivity for rare issues.
for (const category of CATEGORIES) {
const ruleCountMap = ruleIssueCountPerCategory.get(category)!;
const ruleScoreMap = ruleScorePerCategory.get(category)!;
let dampedWeight = 0;
for (const [ruleId, count] of ruleCountMap) {
const ruleScore = ruleScoreMap.get(ruleId) ?? 0;
dampedWeight += ruleScore * Math.sqrt(count);
}
categoryScores[category].weightedIssueCount = dampedWeight;
}

// Calculate percentage for each category based on density + diversity
Expand Down
Loading