let-sunny · let-sunny · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026
diff --git a/.claude/commands/calibrate-loop.md b/.claude/commands/calibrate-loop.md
@@ -108,7 +108,13 @@ Proceed to Step 4.
 
 After the Gap Analyzer returns, **you** write the JSON to `$RUN_DIR/gaps.json`.
 
-> **Note**: Discovery evidence from gap analysis is collected programmatically by the orchestrator during Step 4 (Evaluation). Do not manually append to `data/discovery-evidence.json`.
+Then collect uncovered actionable gaps into discovery evidence (deterministic CLI — no LLM):
+
+```bash
+npx canicode calibrate-collect-gap-evidence $RUN_DIR
+```
+
+This reads `gaps.json`, extracts gaps where `actionable: true` and `coveredByRule: null`, and appends them to `data/discovery-evidence.json` as `source: "gap-analysis"` entries.
 
 Append to `$RUN_DIR/activity.jsonl`:
 ```json

diff --git a/src/agents/evidence-collector.test.ts b/src/agents/evidence-collector.test.ts
@@ -336,7 +336,7 @@ describe("evidence-collector", () => {
       ], disPath);
 
       appendDiscoveryEvidence([
-        { description: "gap2", category: "color", impact: "moderate", fixture: "fx2", timestamp: "t2", source: "gap-analysis" },
+        { description: "gap2", category: "code-quality", impact: "moderate", fixture: "fx2", timestamp: "t2", source: "gap-analysis" },
       ], disPath);
 
       const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { entries: DiscoveryEvidenceEntry[] };
@@ -415,7 +415,7 @@ describe("evidence-collector", () => {
       ]), "utf-8");
 
       appendDiscoveryEvidence([
-        { description: "new", category: "color", impact: "easy", fixture: "fx2", timestamp: "t1", source: "gap-analysis" },
+        { description: "new", category: "pixel-critical", impact: "easy", fixture: "fx2", timestamp: "t1", source: "gap-analysis" },
       ], disPath);
 
       const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { schemaVersion: number; entries: DiscoveryEvidenceEntry[] };
@@ -435,6 +435,24 @@ describe("evidence-collector", () => {
       expect(after).toBe(before);
     });
 
+    it("warns on non-standard category", () => {
+      const spy = vi.spyOn(console, "warn").mockImplementation(() => {});
+      appendDiscoveryEvidence([
+        { description: "gap1", category: "old-structure", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" },
+      ], disPath);
+      expect(spy).toHaveBeenCalledWith(expect.stringContaining('Non-standard category "old-structure"'));
+      spy.mockRestore();
+    });
+
+    it("does not warn on standard category", () => {
+      const spy = vi.spyOn(console, "warn").mockImplementation(() => {});
+      appendDiscoveryEvidence([
+        { description: "gap1", category: "pixel-critical", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" },
+      ], disPath);
+      expect(spy).not.toHaveBeenCalled();
+      spy.mockRestore();
+    });
+
     it("throws when file has unsupported schemaVersion", () => {
       const file = { schemaVersion: 999, entries: [] };
       writeFileSync(disPath, JSON.stringify(file), "utf-8");
@@ -454,14 +472,14 @@ describe("evidence-collector", () => {
       appendDiscoveryEvidence([
         { description: "gap1", category: "Pixel-critical", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" },
         { description: "gap2", category: "pixel-critical", impact: "hard", fixture: "fx2", timestamp: "t2", source: "gap-analysis" },
-        { description: "gap3", category: "color", impact: "moderate", fixture: "fx1", timestamp: "t1", source: "evaluation" },
+        { description: "gap3", category: "token-management", impact: "moderate", fixture: "fx1", timestamp: "t1", source: "evaluation" },
       ], disPath);
 
       pruneDiscoveryEvidence(["pixel-critical"], disPath);
 
       const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { entries: DiscoveryEvidenceEntry[] };
       expect(raw.entries).toHaveLength(1);
-      expect(raw.entries[0]!.category).toBe("color");
+      expect(raw.entries[0]!.category).toBe("token-management");
     });
 
     it("writes versioned format after prune", () => {

diff --git a/src/agents/evidence-collector.ts b/src/agents/evidence-collector.ts
@@ -6,6 +6,7 @@ import {
   DiscoveryEvidenceFileSchema,
   DISCOVERY_EVIDENCE_SCHEMA_VERSION,
 } from "./contracts/evidence.js";
+import { CategorySchema } from "../core/contracts/category.js";
 import type {
   CalibrationEvidenceEntry,
   CrossRunEvidence,
@@ -293,6 +294,15 @@ export function appendDiscoveryEvidence(
   evidencePath: string = DEFAULT_DISCOVERY_PATH
 ): void {
   if (entries.length === 0) return;
+
+  // Warn on non-standard categories (safety net for converter typos/old labels)
+  for (const e of entries) {
+    const parsed = CategorySchema.safeParse(e.category);
+    if (!parsed.success) {
+      console.warn(`[evidence] Non-standard category "${e.category}" in discovery evidence (expected: ${CategorySchema.options.join(", ")})`);
+    }
+  }
+
   const existing = readDiscoveryEvidence(evidencePath);
 
   // Build map of existing entries keyed by dedupe key

diff --git a/src/cli/commands/internal/calibrate-debate.ts b/src/cli/commands/internal/calibrate-debate.ts
@@ -1,9 +1,10 @@
 import { existsSync, readFileSync, writeFileSync } from "node:fs";
-import { join, resolve } from "node:path";
+import { join } from "node:path";
 import type { CAC } from "cac";
 
 import { parseDebateResult } from "../../../agents/run-directory.js";
 import { loadCalibrationEvidence } from "../../../agents/evidence-collector.js";
+import { resolveRunDir } from "./cli-helpers.js";
 
 // ─── calibrate-gather-evidence ──────────────────────────────────────────────
 
@@ -103,11 +104,8 @@ export function registerGatherEvidence(cli: CAC): void {
       "Gather structured evidence for Critic from run artifacts + cross-run data"
     )
     .action((runDir: string) => {
-      const dir = resolve(runDir);
-      if (!existsSync(dir)) {
-        console.log(`Run directory not found: ${runDir}`);
-        return;
-      }
+      const dir = resolveRunDir(runDir);
+      if (!dir) return;
 
       const proposedRuleIds = loadProposedRuleIds(dir);
       const evidence = gatherEvidence(dir, proposedRuleIds);
@@ -134,11 +132,8 @@ export function registerFinalizeDebate(cli: CAC): void {
       "Check early-stop or determine stoppingReason after debate"
     )
     .action((runDir: string) => {
-      const dir = resolve(runDir);
-      if (!existsSync(dir)) {
-        console.log(`Run directory not found: ${runDir}`);
-        return;
-      }
+      const dir = resolveRunDir(runDir);
+      if (!dir) return;
 
       const debate = parseDebateResult(dir);
       if (!debate) {

diff --git a/src/cli/commands/internal/cli-helpers.ts b/src/cli/commands/internal/cli-helpers.ts
@@ -0,0 +1,30 @@
+import { existsSync, statSync } from "node:fs";
+import { resolve } from "node:path";
+import { z } from "zod";
+
+export const RUN_DIR_ARG_SCHEMA = z.string().trim().min(1, "runDir is required");
+export const KEYWORD_ARG_SCHEMA = z.string().trim().min(1, "keyword is required");
+
+/**
+ * Validate and resolve a run directory path.
+ * Returns the resolved absolute path, or null if invalid/missing/not a directory.
+ * Logs to stdout and returns null on failure (internal CLI convention).
+ */
+export function resolveRunDir(runDir: string): string | null {
+  const parsed = RUN_DIR_ARG_SCHEMA.safeParse(runDir);
+  if (!parsed.success) {
+    console.log(`Invalid runDir: ${parsed.error.issues[0]?.message}`);
+    return null;
+  }
+  const dir = resolve(parsed.data);
+  try {
+    if (!existsSync(dir) || !statSync(dir).isDirectory()) {
+      console.log(`Run directory not found or is not a directory: ${runDir}`);
+      return null;
+    }
+  } catch {
+    console.log(`Run directory not accessible: ${runDir}`);
+    return null;
+  }
+  return dir;
+}
diff --git a/src/cli/commands/internal/fixture-management.ts b/src/cli/commands/internal/fixture-management.ts
@@ -1,6 +1,6 @@
-import { existsSync } from "node:fs";
 import { basename, resolve } from "node:path";
 import type { CAC } from "cac";
+import { resolveRunDir } from "./cli-helpers.js";
 
 import {
   listActiveFixtures,
@@ -121,11 +121,8 @@ export function registerEvidenceEnrich(cli: CAC): void {
       "Enrich evidence with Critic's pro/con/confidence from debate.json"
     )
     .action((runDir: string) => {
-      const resolvedDir = resolve(runDir);
-      if (!existsSync(resolvedDir)) {
-        console.log(`Run directory not found: ${runDir}`);
-        return;
-      }
+      const resolvedDir = resolveRunDir(runDir);
+      if (!resolvedDir) return;
       const debate = parseDebateResult(resolvedDir);
       if (!debate?.critic) {
         console.log("No critic reviews in debate.json — nothing to enrich.");
@@ -161,11 +158,9 @@ export function registerEvidencePrune(cli: CAC): void {
       "Prune evidence for rules applied by the Arbitrator in the given run"
     )
     .action((runDir: string) => {
-      if (!existsSync(resolve(runDir))) {
-        console.log(`Run directory not found: ${runDir}`);
-        return;
-      }
-      const debate = parseDebateResult(resolve(runDir));
+      const resolvedDir = resolveRunDir(runDir);
+      if (!resolvedDir) return;
+      const debate = parseDebateResult(resolvedDir);
       if (!debate) {
         console.log("No debate.json found — nothing to prune.");
         return;
@@ -197,4 +192,4 @@ export function registerEvidencePrune(cli: CAC): void {
         process.exitCode = 1;
       }
     });
-}
+}
diff --git a/src/cli/commands/internal/rule-discovery.test.ts b/src/cli/commands/internal/rule-discovery.test.ts
@@ -3,7 +3,7 @@ import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { rm } from "node:fs/promises";
 
-import { filterDiscoveryEvidence, readDecision } from "./rule-discovery.js";
+import { filterDiscoveryEvidence, readDecision, collectGapEvidence } from "./rule-discovery.js";
 
 describe("filterDiscoveryEvidence", () => {
   it("returns empty array when no matching evidence exists", () => {
@@ -118,3 +118,63 @@ describe("readDecision", () => {
     expect(readDecision(runDir)).toBeNull();
   });
 });
+
+describe("collectGapEvidence", () => {
+  let runDir: string;
+
+  beforeEach(() => {
+    runDir = mkdtempSync(join(tmpdir(), "gap-evidence-test-"));
+  });
+
+  afterEach(async () => {
+    await rm(runDir, { recursive: true, force: true });
+  });
+
+  it("extracts uncovered actionable gaps", () => {
+    writeFileSync(join(runDir, "gaps.json"), JSON.stringify({
+      gaps: [
+        { category: "spacing", description: "padding off", actionable: true, coveredByRule: null },
+        { category: "color", description: "wrong shade", actionable: true, coveredByRule: null },
+        { category: "rendering", description: "font fallback", actionable: false },
+        { category: "layout", description: "flex gap", actionable: true, coveredByRule: "no-auto-layout" },
+      ],
+    }));
+
+    const entries = collectGapEvidence(runDir, "test-fixture");
+    expect(entries).toHaveLength(2);
+    expect(entries[0]!.category).toBe("spacing");
+    expect(entries[0]!.source).toBe("gap-analysis");
+    expect(entries[0]!.fixture).toBe("test-fixture");
+    expect(entries[1]!.category).toBe("color");
+  });
+
+  it("returns empty for no gaps.json", () => {
+    expect(collectGapEvidence(runDir, "fx")).toHaveLength(0);
+  });
+
+  it("returns empty when all gaps are covered or non-actionable", () => {
+    writeFileSync(join(runDir, "gaps.json"), JSON.stringify({
+      gaps: [
+        { category: "spacing", description: "x", actionable: false },
+        { category: "color", description: "y", actionable: true, coveredByRule: "raw-value" },
+      ],
+    }));
+
+    expect(collectGapEvidence(runDir, "fx")).toHaveLength(0);
+  });
+
+  it("skips actionable gap when coveredByRule is empty string", () => {
+    writeFileSync(join(runDir, "gaps.json"), JSON.stringify({
+      gaps: [
+        { category: "spacing", description: "x", actionable: true, coveredByRule: "" },
+      ],
+    }));
+
+    expect(collectGapEvidence(runDir, "fx")).toHaveLength(0);
+  });
+
+  it("returns empty for malformed gaps.json", () => {
+    writeFileSync(join(runDir, "gaps.json"), "not json");
+    expect(collectGapEvidence(runDir, "fx")).toHaveLength(0);
+  });
+});