let-sunny · let-sunny · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/.claude/agents/calibration/converter.md b/.claude/agents/calibration/converter.md
@@ -132,7 +132,24 @@ Read and follow `.claude/skills/design-to-code/PROMPT.md` for all code generatio
     f. **CSS metrics** (same rules as step 9 — count only inside `<style>`):  
        - `baselineCssClassCount` / `baselineCssVariableCount` from `$RUN_DIR/output.html`  
        - `strippedCssClassCount` / `strippedCssVariableCount` from `$RUN_DIR/stripped/<strip-type>.html`
-    g. **Responsive similarity (optional, primarily for `size-constraints`):** If step 6 ran a responsive comparison (two+ fixture screenshots), run the same comparison for this strip HTML: same `--figma-screenshot`, `--width`, and `--output` pattern as baseline responsive, against `$RUN_DIR/stripped/<strip-type>.html`. Record `baselineResponsiveSimilarity` / `strippedResponsiveSimilarity` / `responsiveViewport` from baseline vs strip outputs; `responsiveDelta` = `baselineResponsiveSimilarity - strippedResponsiveSimilarity` (percentage points). If step 6 skipped responsive, set `baselineResponsiveSimilarity`, `strippedResponsiveSimilarity`, `responsiveDelta`, and `responsiveViewport` to `null`.
+    g. **Responsive similarity at the expanded viewport** (same screenshot + width as step 6):
+
+       If step 6 **skipped** (only one fixture screenshot): set `baselineResponsiveSimilarity`, `strippedResponsiveSimilarity`, `responsiveDelta`, and `responsiveViewport` to `null` on **every** strip row.
+
+       If step 6 **ran**: reuse the same `LARGEST` screenshot path and `LARGEST_WIDTH` variables from step 6.
+
+       - **`size-constraints` (required):** Run visual-compare on the stripped HTML at the expanded viewport so missing size info shows up where it actually breaks (not only at design width):
+
+         ```bash
+         npx canicode visual-compare $RUN_DIR/stripped/size-constraints.html \
+           --figma-screenshot "$LARGEST" \
+           --width "$LARGEST_WIDTH" \
+           --output $RUN_DIR/stripped/size-constraints-responsive
+         ```
+
+         Record JSON stdout `similarity` as **`strippedResponsiveSimilarity`**. Set **`baselineResponsiveSimilarity`** to the root conversion field **`responsiveSimilarity`** from step 6 (baseline `output.html` at the same viewport — already measured). Set **`responsiveViewport`** to `LARGEST_WIDTH` (number). Set **`responsiveDelta`** = `baselineResponsiveSimilarity - strippedResponsiveSimilarity` (percentage points).
+
+       - **Other strip types:** Optional — same command pattern with `$RUN_DIR/stripped/<strip-type>.html` and a distinct `--output` directory if you want responsive rows for reporting; otherwise set the four responsive fields to `null`.
 
     **Derived fields (every strip row):**
 

diff --git a/.claude/agents/calibration/critic.md b/.claude/agents/calibration/critic.md
@@ -35,7 +35,7 @@ Use ALL inputs to form pro/con arguments. Do not rely on proposals alone.
 When `stripDeltas` are present, they provide the **most reliable** difficulty signal **for rules where the evaluator applies strip ablation override** (see `STRIP_TYPE_RULES` in `src/agents/evaluation-agent.ts`):
 - They measure objective degradation when info is removed (not AI self-assessment): **pixel delta** (`stripDeltaToDifficulty`) for `layout-direction-spacing`; **difficulty from baseline vs stripped input-token ratio** (`tokenDeltaToDifficulty` — relative % savings, not absolute token drop) for `component-references`, `node-names-hierarchy`, `variable-references`, `style-references`; **responsive similarity delta** for `size-constraints` when that metric is recorded
 - Strip-to-rule mapping (calibration): `layout-direction-spacing` → `no-auto-layout`, `absolute-position-in-auto-layout`, `non-layout-container`, `irregular-spacing`; `size-constraints` → `missing-size-constraint`, `fixed-size-in-auto-layout`; `component-references` → `missing-component`, `detached-instance`, `variant-structure-mismatch`; `node-names-hierarchy` → `non-standard-naming`, `non-semantic-name`, `inconsistent-naming-convention`; `variable-references` / `style-references` → `raw-value`
-- **Responsive-critical** rules (`missing-size-constraint`, `fixed-size-in-auto-layout`): the evaluator currently **skips** the strip ablation override pass for this category (so design-viewport pixel fallback on `size-constraints` cannot erase baseline responsive classification). Treat **`stripDeltas["size-constraints"]` as narrative context only** for these rules until per-strip responsive metrics are wired (#205); **trust baseline page `responsiveDelta`** (and the evaluation output) over Converter opinion — but **do not** treat strip rows as having overridden that path.
+- **Responsive-critical** rules (`missing-size-constraint`, `fixed-size-in-auto-layout`): if `stripDeltas["size-constraints"].responsiveDelta` is **absent or non-finite**, the evaluator **skips** strip override for this category (baseline page `responsiveDelta` stands). When that field is a **finite number** (expanded-viewport compare per `converter.md` / #205), the evaluator **may apply** strip ablation for these rules using `stripDeltaToDifficulty(responsiveDelta)` on the strip row — prefer that strip signal over Converter when it conflicts, after the baseline responsive pass.
 - For **all other rules** that have a strip mapping, **prefer the strip-derived difficulty (pixel or token ratio)** over Converter's `ruleImpactAssessment` when they conflict — the strip metric is what the evaluator uses for those rules.
 - Higher delta (for the metric that applies to that strip family) = removing that info hurt more = rule is more important
 

diff --git a/src/agents/evaluation-agent.test.ts b/src/agents/evaluation-agent.test.ts
@@ -485,14 +485,14 @@ describe("runEvaluationAgent", () => {
     expect(layoutMatch!.actualDifficulty).toBe("moderate");
     expect(layoutMatch!.reasoning).toContain("strip-ablation");
 
-    // fixed-size-in-auto-layout: responsive delta applied; strip pass skips responsive-critical (#208 review)
+    // fixed-size-in-auto-layout: responsive delta applied; strip pass skips (no finite size-constraints responsiveDelta)
     const responsiveMatch = result.mismatches.find(m => m.ruleId === "fixed-size-in-auto-layout");
     expect(responsiveMatch).toBeDefined();
     expect(responsiveMatch!.actualDifficulty).toBe("hard");
     expect(responsiveMatch!.reasoning).toContain("responsive");
   });
 
-  it("does not let strip ablation override responsive-critical rules (avoids pixel fallback on size-constraints)", () => {
+  it("does not let strip ablation override responsive-critical when size-constraints responsiveDelta is missing", () => {
     const input: EvaluationAgentInput = {
       nodeIssueSummaries: [
         { nodeId: "node-1", nodePath: "Page > Frame", flaggedRuleIds: ["missing-size-constraint"] },
@@ -533,6 +533,86 @@ describe("runEvaluationAgent", () => {
     expect(match!.reasoning).not.toContain("strip-ablation");
   });
 
+  it("applies strip ablation to responsive-critical when size-constraints has finite responsiveDelta (#205)", () => {
+    const input: EvaluationAgentInput = {
+      nodeIssueSummaries: [
+        { nodeId: "node-1", nodePath: "Page > Frame", flaggedRuleIds: ["missing-size-constraint"] },
+      ],
+      conversionRecords: [
+        {
+          nodeId: "node-1",
+          nodePath: "Page > Frame",
+          difficulty: "easy",
+          ruleRelatedStruggles: [
+            { ruleId: "missing-size-constraint", description: "Fine", actualImpact: "easy" },
+          ],
+          uncoveredStruggles: [],
+        },
+      ],
+      ruleScores: {
+        "missing-size-constraint": { score: -2, severity: "suggestion" },
+      },
+      responsiveDelta: 25,
+      stripDeltas: {
+        "size-constraints": {
+          pixelDelta: 2,
+          responsiveDelta: 14,
+          baselineInputTokens: null,
+          strippedInputTokens: null,
+        },
+      },
+    };
+
+    const result = runEvaluationAgent(input);
+
+    const match = result.mismatches.find(m => m.ruleId === "missing-size-constraint");
+    expect(match).toBeDefined();
+    // Baseline responsive → hard; strip responsive delta 14%p → moderate (stripDeltaToDifficulty)
+    expect(match!.actualDifficulty).toBe("moderate");
+    expect(match!.reasoning).toContain("strip-ablation");
+  });
+
+  it.each([Number.NaN, Number.POSITIVE_INFINITY, Number.NEGATIVE_INFINITY] as const)(
+    "does not apply strip override for responsive-critical when size-constraints responsiveDelta is non-finite (%p)",
+    (nonFiniteResponsiveDelta) => {
+      const input: EvaluationAgentInput = {
+        nodeIssueSummaries: [
+          { nodeId: "node-1", nodePath: "Page > Frame", flaggedRuleIds: ["missing-size-constraint"] },
+        ],
+        conversionRecords: [
+          {
+            nodeId: "node-1",
+            nodePath: "Page > Frame",
+            difficulty: "easy",
+            ruleRelatedStruggles: [
+              { ruleId: "missing-size-constraint", description: "Fine", actualImpact: "easy" },
+            ],
+            uncoveredStruggles: [],
+          },
+        ],
+        ruleScores: {
+          "missing-size-constraint": { score: -2, severity: "suggestion" },
+        },
+        responsiveDelta: 25,
+        stripDeltas: {
+          "size-constraints": {
+            pixelDelta: 2,
+            responsiveDelta: nonFiniteResponsiveDelta,
+            baselineInputTokens: null,
+            strippedInputTokens: null,
+          },
+        },
+      };
+
+      const result = runEvaluationAgent(input);
+      const match = result.mismatches.find(m => m.ruleId === "missing-size-constraint");
+      expect(match).toBeDefined();
+      expect(match!.actualDifficulty).toBe("hard");
+      expect(match!.reasoning).toContain("responsive");
+      expect(match!.reasoning).not.toContain("strip-ablation");
+    },
+  );
+
   it("merges all nodeIssueSummaries when wholeDesign is true", () => {
     const input: EvaluationAgentInput = {
       nodeIssueSummaries: [

diff --git a/src/agents/evaluation-agent.ts b/src/agents/evaluation-agent.ts
@@ -222,12 +222,17 @@ export function runEvaluationAgent(
   if (input.stripDeltas) {
     for (const mismatch of mismatches) {
       if (!mismatch.ruleId) continue;
-      // Baseline responsiveDelta already classified responsive-critical rules. Do not override
-      // with strip metrics: size-constraints strip may fall back to design-viewport pixel delta
-      // when per-strip responsive compare is not wired (#205).
+      // Responsive-critical: only apply strip override when size-constraints has a measured
+      // responsive delta (#205). Otherwise skip so design-viewport pixel fallback cannot replace
+      // baseline page responsiveDelta classification.
       if (mismatch.ruleId in RULE_ID_CATEGORY) {
         const category = RULE_ID_CATEGORY[mismatch.ruleId as RuleId];
-        if (category === "responsive-critical") continue;
+        if (
+          category === "responsive-critical" &&
+          !sizeConstraintsStripHasResponsiveMetric(input.stripDeltas)
+        ) {
+          continue;
+        }
       }
       const stripDifficulty = getStripDifficultyForRule(mismatch.ruleId, input.stripDeltas);
       if (!stripDifficulty) continue;
@@ -257,6 +262,13 @@ export function runEvaluationAgent(
  * Map strip type to related rule IDs.
  * Based on what information each strip type removes and which rules detect those issues.
  */
+function sizeConstraintsStripHasResponsiveMetric(
+  stripDeltas: Record<string, StripDeltaForEval>,
+): boolean {
+  const rd = stripDeltas["size-constraints"]?.responsiveDelta;
+  return rd != null && Number.isFinite(rd);
+}
+
 const STRIP_TYPE_RULES: Record<DesignTreeInfoType, RuleId[]> = {
   "layout-direction-spacing": ["no-auto-layout", "absolute-position-in-auto-layout", "non-layout-container", "irregular-spacing"],
   "size-constraints": ["missing-size-constraint", "fixed-size-in-auto-layout"],