diff --git a/.claude/commands/calibrate-loop.md b/.claude/commands/calibrate-loop.md index 9fadb295..a1accf7d 100644 --- a/.claude/commands/calibrate-loop.md +++ b/.claude/commands/calibrate-loop.md @@ -108,7 +108,13 @@ Proceed to Step 4. After the Gap Analyzer returns, **you** write the JSON to `$RUN_DIR/gaps.json`. -> **Note**: Discovery evidence from gap analysis is collected programmatically by the orchestrator during Step 4 (Evaluation). Do not manually append to `data/discovery-evidence.json`. +Then collect uncovered actionable gaps into discovery evidence (deterministic CLI — no LLM): + +```bash +npx canicode calibrate-collect-gap-evidence $RUN_DIR +``` + +This reads `gaps.json`, extracts gaps where `actionable: true` and `coveredByRule: null`, and appends them to `data/discovery-evidence.json` as `source: "gap-analysis"` entries. Append to `$RUN_DIR/activity.jsonl`: ```json diff --git a/src/agents/evidence-collector.test.ts b/src/agents/evidence-collector.test.ts index b2ced476..06fb3e15 100644 --- a/src/agents/evidence-collector.test.ts +++ b/src/agents/evidence-collector.test.ts @@ -336,7 +336,7 @@ describe("evidence-collector", () => { ], disPath); appendDiscoveryEvidence([ - { description: "gap2", category: "color", impact: "moderate", fixture: "fx2", timestamp: "t2", source: "gap-analysis" }, + { description: "gap2", category: "code-quality", impact: "moderate", fixture: "fx2", timestamp: "t2", source: "gap-analysis" }, ], disPath); const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { entries: DiscoveryEvidenceEntry[] }; @@ -415,7 +415,7 @@ describe("evidence-collector", () => { ]), "utf-8"); appendDiscoveryEvidence([ - { description: "new", category: "color", impact: "easy", fixture: "fx2", timestamp: "t1", source: "gap-analysis" }, + { description: "new", category: "pixel-critical", impact: "easy", fixture: "fx2", timestamp: "t1", source: "gap-analysis" }, ], disPath); const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { schemaVersion: number; entries: DiscoveryEvidenceEntry[] }; @@ -435,6 +435,24 @@ describe("evidence-collector", () => { expect(after).toBe(before); }); + it("warns on non-standard category", () => { + const spy = vi.spyOn(console, "warn").mockImplementation(() => {}); + appendDiscoveryEvidence([ + { description: "gap1", category: "old-structure", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" }, + ], disPath); + expect(spy).toHaveBeenCalledWith(expect.stringContaining('Non-standard category "old-structure"')); + spy.mockRestore(); + }); + + it("does not warn on standard category", () => { + const spy = vi.spyOn(console, "warn").mockImplementation(() => {}); + appendDiscoveryEvidence([ + { description: "gap1", category: "pixel-critical", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" }, + ], disPath); + expect(spy).not.toHaveBeenCalled(); + spy.mockRestore(); + }); + it("throws when file has unsupported schemaVersion", () => { const file = { schemaVersion: 999, entries: [] }; writeFileSync(disPath, JSON.stringify(file), "utf-8"); @@ -454,14 +472,14 @@ describe("evidence-collector", () => { appendDiscoveryEvidence([ { description: "gap1", category: "Pixel-critical", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" }, { description: "gap2", category: "pixel-critical", impact: "hard", fixture: "fx2", timestamp: "t2", source: "gap-analysis" }, - { description: "gap3", category: "color", impact: "moderate", fixture: "fx1", timestamp: "t1", source: "evaluation" }, + { description: "gap3", category: "token-management", impact: "moderate", fixture: "fx1", timestamp: "t1", source: "evaluation" }, ], disPath); pruneDiscoveryEvidence(["pixel-critical"], disPath); const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { entries: DiscoveryEvidenceEntry[] }; expect(raw.entries).toHaveLength(1); - expect(raw.entries[0]!.category).toBe("color"); + expect(raw.entries[0]!.category).toBe("token-management"); }); it("writes versioned format after prune", () => { diff --git a/src/agents/evidence-collector.ts b/src/agents/evidence-collector.ts index 5f9e2d2f..24aef148 100644 --- a/src/agents/evidence-collector.ts +++ b/src/agents/evidence-collector.ts @@ -6,6 +6,7 @@ import { DiscoveryEvidenceFileSchema, DISCOVERY_EVIDENCE_SCHEMA_VERSION, } from "./contracts/evidence.js"; +import { CategorySchema } from "../core/contracts/category.js"; import type { CalibrationEvidenceEntry, CrossRunEvidence, @@ -293,6 +294,15 @@ export function appendDiscoveryEvidence( evidencePath: string = DEFAULT_DISCOVERY_PATH ): void { if (entries.length === 0) return; + + // Warn on non-standard categories (safety net for converter typos/old labels) + for (const e of entries) { + const parsed = CategorySchema.safeParse(e.category); + if (!parsed.success) { + console.warn(`[evidence] Non-standard category "${e.category}" in discovery evidence (expected: ${CategorySchema.options.join(", ")})`); + } + } + const existing = readDiscoveryEvidence(evidencePath); // Build map of existing entries keyed by dedupe key diff --git a/src/cli/commands/internal/calibrate-debate.ts b/src/cli/commands/internal/calibrate-debate.ts index 7a228e29..ddd94ce7 100644 --- a/src/cli/commands/internal/calibrate-debate.ts +++ b/src/cli/commands/internal/calibrate-debate.ts @@ -1,9 +1,10 @@ import { existsSync, readFileSync, writeFileSync } from "node:fs"; -import { join, resolve } from "node:path"; +import { join } from "node:path"; import type { CAC } from "cac"; import { parseDebateResult } from "../../../agents/run-directory.js"; import { loadCalibrationEvidence } from "../../../agents/evidence-collector.js"; +import { resolveRunDir } from "./cli-helpers.js"; // ─── calibrate-gather-evidence ────────────────────────────────────────────── @@ -103,11 +104,8 @@ export function registerGatherEvidence(cli: CAC): void { "Gather structured evidence for Critic from run artifacts + cross-run data" ) .action((runDir: string) => { - const dir = resolve(runDir); - if (!existsSync(dir)) { - console.log(`Run directory not found: ${runDir}`); - return; - } + const dir = resolveRunDir(runDir); + if (!dir) return; const proposedRuleIds = loadProposedRuleIds(dir); const evidence = gatherEvidence(dir, proposedRuleIds); @@ -134,11 +132,8 @@ export function registerFinalizeDebate(cli: CAC): void { "Check early-stop or determine stoppingReason after debate" ) .action((runDir: string) => { - const dir = resolve(runDir); - if (!existsSync(dir)) { - console.log(`Run directory not found: ${runDir}`); - return; - } + const dir = resolveRunDir(runDir); + if (!dir) return; const debate = parseDebateResult(dir); if (!debate) { diff --git a/src/cli/commands/internal/cli-helpers.ts b/src/cli/commands/internal/cli-helpers.ts new file mode 100644 index 00000000..f59dae8e --- /dev/null +++ b/src/cli/commands/internal/cli-helpers.ts @@ -0,0 +1,30 @@ +import { existsSync, statSync } from "node:fs"; +import { resolve } from "node:path"; +import { z } from "zod"; + +export const RUN_DIR_ARG_SCHEMA = z.string().trim().min(1, "runDir is required"); +export const KEYWORD_ARG_SCHEMA = z.string().trim().min(1, "keyword is required"); + +/** + * Validate and resolve a run directory path. + * Returns the resolved absolute path, or null if invalid/missing/not a directory. + * Logs to stdout and returns null on failure (internal CLI convention). + */ +export function resolveRunDir(runDir: string): string | null { + const parsed = RUN_DIR_ARG_SCHEMA.safeParse(runDir); + if (!parsed.success) { + console.log(`Invalid runDir: ${parsed.error.issues[0]?.message}`); + return null; + } + const dir = resolve(parsed.data); + try { + if (!existsSync(dir) || !statSync(dir).isDirectory()) { + console.log(`Run directory not found or is not a directory: ${runDir}`); + return null; + } + } catch { + console.log(`Run directory not accessible: ${runDir}`); + return null; + } + return dir; +} diff --git a/src/cli/commands/internal/fixture-management.ts b/src/cli/commands/internal/fixture-management.ts index 85dfea03..12b73324 100644 --- a/src/cli/commands/internal/fixture-management.ts +++ b/src/cli/commands/internal/fixture-management.ts @@ -1,6 +1,6 @@ -import { existsSync } from "node:fs"; import { basename, resolve } from "node:path"; import type { CAC } from "cac"; +import { resolveRunDir } from "./cli-helpers.js"; import { listActiveFixtures, @@ -121,11 +121,8 @@ export function registerEvidenceEnrich(cli: CAC): void { "Enrich evidence with Critic's pro/con/confidence from debate.json" ) .action((runDir: string) => { - const resolvedDir = resolve(runDir); - if (!existsSync(resolvedDir)) { - console.log(`Run directory not found: ${runDir}`); - return; - } + const resolvedDir = resolveRunDir(runDir); + if (!resolvedDir) return; const debate = parseDebateResult(resolvedDir); if (!debate?.critic) { console.log("No critic reviews in debate.json — nothing to enrich."); @@ -161,11 +158,9 @@ export function registerEvidencePrune(cli: CAC): void { "Prune evidence for rules applied by the Arbitrator in the given run" ) .action((runDir: string) => { - if (!existsSync(resolve(runDir))) { - console.log(`Run directory not found: ${runDir}`); - return; - } - const debate = parseDebateResult(resolve(runDir)); + const resolvedDir = resolveRunDir(runDir); + if (!resolvedDir) return; + const debate = parseDebateResult(resolvedDir); if (!debate) { console.log("No debate.json found — nothing to prune."); return; @@ -197,4 +192,4 @@ export function registerEvidencePrune(cli: CAC): void { process.exitCode = 1; } }); -} +} \ No newline at end of file diff --git a/src/cli/commands/internal/rule-discovery.test.ts b/src/cli/commands/internal/rule-discovery.test.ts index 3e6d6a25..5bb9d811 100644 --- a/src/cli/commands/internal/rule-discovery.test.ts +++ b/src/cli/commands/internal/rule-discovery.test.ts @@ -3,7 +3,7 @@ import { join } from "node:path"; import { tmpdir } from "node:os"; import { rm } from "node:fs/promises"; -import { filterDiscoveryEvidence, readDecision } from "./rule-discovery.js"; +import { filterDiscoveryEvidence, readDecision, collectGapEvidence } from "./rule-discovery.js"; describe("filterDiscoveryEvidence", () => { it("returns empty array when no matching evidence exists", () => { @@ -118,3 +118,63 @@ describe("readDecision", () => { expect(readDecision(runDir)).toBeNull(); }); }); + +describe("collectGapEvidence", () => { + let runDir: string; + + beforeEach(() => { + runDir = mkdtempSync(join(tmpdir(), "gap-evidence-test-")); + }); + + afterEach(async () => { + await rm(runDir, { recursive: true, force: true }); + }); + + it("extracts uncovered actionable gaps", () => { + writeFileSync(join(runDir, "gaps.json"), JSON.stringify({ + gaps: [ + { category: "spacing", description: "padding off", actionable: true, coveredByRule: null }, + { category: "color", description: "wrong shade", actionable: true, coveredByRule: null }, + { category: "rendering", description: "font fallback", actionable: false }, + { category: "layout", description: "flex gap", actionable: true, coveredByRule: "no-auto-layout" }, + ], + })); + + const entries = collectGapEvidence(runDir, "test-fixture"); + expect(entries).toHaveLength(2); + expect(entries[0]!.category).toBe("spacing"); + expect(entries[0]!.source).toBe("gap-analysis"); + expect(entries[0]!.fixture).toBe("test-fixture"); + expect(entries[1]!.category).toBe("color"); + }); + + it("returns empty for no gaps.json", () => { + expect(collectGapEvidence(runDir, "fx")).toHaveLength(0); + }); + + it("returns empty when all gaps are covered or non-actionable", () => { + writeFileSync(join(runDir, "gaps.json"), JSON.stringify({ + gaps: [ + { category: "spacing", description: "x", actionable: false }, + { category: "color", description: "y", actionable: true, coveredByRule: "raw-value" }, + ], + })); + + expect(collectGapEvidence(runDir, "fx")).toHaveLength(0); + }); + + it("skips actionable gap when coveredByRule is empty string", () => { + writeFileSync(join(runDir, "gaps.json"), JSON.stringify({ + gaps: [ + { category: "spacing", description: "x", actionable: true, coveredByRule: "" }, + ], + })); + + expect(collectGapEvidence(runDir, "fx")).toHaveLength(0); + }); + + it("returns empty for malformed gaps.json", () => { + writeFileSync(join(runDir, "gaps.json"), "not json"); + expect(collectGapEvidence(runDir, "fx")).toHaveLength(0); + }); +}); diff --git a/src/cli/commands/internal/rule-discovery.ts b/src/cli/commands/internal/rule-discovery.ts index 717379b5..a8c16081 100644 --- a/src/cli/commands/internal/rule-discovery.ts +++ b/src/cli/commands/internal/rule-discovery.ts @@ -1,9 +1,11 @@ -import { existsSync, readFileSync, statSync, writeFileSync } from "node:fs"; -import { join, resolve } from "node:path"; +import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; import type { CAC } from "cac"; -import { loadDiscoveryEvidence } from "../../../agents/evidence-collector.js"; +import { z } from "zod"; +import { loadDiscoveryEvidence, appendDiscoveryEvidence } from "../../../agents/evidence-collector.js"; import type { DiscoveryEvidenceEntry } from "../../../agents/evidence-collector.js"; import { DecisionFileSchema } from "../../../agents/contracts/evidence.js"; +import { resolveRunDir, KEYWORD_ARG_SCHEMA } from "./cli-helpers.js"; // ─── discovery-filter-evidence ────────────────────────────────────────────── @@ -39,15 +41,14 @@ export function registerFilterDiscoveryEvidence(cli: CAC): void { ) .option("--run-dir ", "Write filtered evidence to run directory") .action((keyword: string, options: { runDir?: string }) => { + const kParsed = KEYWORD_ARG_SCHEMA.safeParse(keyword); + if (!kParsed.success) { console.log(`Invalid keyword: ${kParsed.error.issues[0]?.message}`); return; } try { - const filtered = filterDiscoveryEvidence(keyword); + const filtered = filterDiscoveryEvidence(kParsed.data); if (options.runDir) { - const dir = resolve(options.runDir); - if (!existsSync(dir) || !statSync(dir).isDirectory()) { - console.log(`Run directory not found or is not a directory: ${options.runDir}`); - return; - } + const dir = resolveRunDir(options.runDir); + if (!dir) return; const outPath = join(dir, "prior-evidence.json"); writeFileSync(outPath, JSON.stringify(filtered, null, 2) + "\n", "utf-8"); console.log(`Filtered ${filtered.length} entries for "${keyword}" → ${outPath}`); @@ -110,11 +111,8 @@ export function registerApplyDecision(cli: CAC): void { "Read decision.json and output the action (commit/revert/adjust)" ) .action((runDir: string) => { - const dir = resolve(runDir); - if (!existsSync(dir) || !statSync(dir).isDirectory()) { - console.log(`Run directory not found or is not a directory: ${runDir}`); - return; - } + const dir = resolveRunDir(runDir); + if (!dir) return; const result = readDecision(dir); if (!result) { @@ -125,3 +123,86 @@ export function registerApplyDecision(cli: CAC): void { console.log(JSON.stringify(result)); }); } + +// ─── calibrate-collect-gap-evidence ───────────────────────────────────────── + +const GapSchema = z.object({ + category: z.string(), + description: z.string(), + actionable: z.boolean(), + coveredByRule: z.unknown().default(null), +}).passthrough(); + +const GapsFileSchema = z.object({ + fixture: z.string().optional(), + gaps: z.array(GapSchema), +}).passthrough(); + +/** + * Extract uncovered actionable gaps from gaps.json and append to discovery evidence. + * Deterministic — no LLM needed. + */ +export function collectGapEvidence(runDir: string, fixture: string): DiscoveryEvidenceEntry[] { + const gapsPath = join(runDir, "gaps.json"); + if (!existsSync(gapsPath)) return []; + + let raw: unknown; + try { + raw = JSON.parse(readFileSync(gapsPath, "utf-8")); + } catch { + return []; + } + const parsed = GapsFileSchema.safeParse(raw); + if (!parsed.success) return []; + + const timestamp = new Date().toISOString(); + const entries: DiscoveryEvidenceEntry[] = []; + + for (const gap of parsed.data.gaps) { + // Only actionable gaps not covered by existing rules + if (!gap.actionable) continue; + // Skip when coveredByRule is present (non-nullish); empty string counts as "marked covered" + if (gap.coveredByRule != null) continue; + + entries.push({ + description: gap.description, + category: gap.category, + impact: "medium", + fixture, + timestamp, + source: "gap-analysis", + }); + } + + return entries; +} + +export function registerCollectGapEvidence(cli: CAC): void { + cli + .command( + "calibrate-collect-gap-evidence ", + "Collect uncovered actionable gaps from gaps.json into discovery evidence" + ) + .action((runDir: string) => { + const dir = resolveRunDir(runDir); + if (!dir) return; + + // Extract fixture name from run dir + const dirName = dir.split(/[/\\]/).pop() ?? ""; + const idx = dirName.lastIndexOf("--"); + const fixture = idx === -1 ? dirName : dirName.slice(0, idx); + + try { + const entries = collectGapEvidence(dir, fixture); + if (entries.length === 0) { + console.log("No uncovered actionable gaps found"); + return; + } + + appendDiscoveryEvidence(entries); + console.log(`Collected ${entries.length} gap evidence entries for fixture "${fixture}"`); + } catch (err) { + console.log(`Failed to collect gap evidence: ${err instanceof Error ? err.message : String(err)}`); + } + }); +} diff --git a/src/cli/index.ts b/src/cli/index.ts index 940ac521..b29b7203 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -33,7 +33,7 @@ import { registerCalibrateEvaluate } from "./commands/internal/calibrate-evaluat import { registerCalibrateGapReport } from "./commands/internal/calibrate-gap-report.js"; import { registerCalibrateRun } from "./commands/internal/calibrate-run.js"; import { registerGatherEvidence, registerFinalizeDebate } from "./commands/internal/calibrate-debate.js"; -import { registerFilterDiscoveryEvidence, registerApplyDecision } from "./commands/internal/rule-discovery.js"; +import { registerFilterDiscoveryEvidence, registerApplyDecision, registerCollectGapEvidence } from "./commands/internal/rule-discovery.js"; import { registerFixtureManagement, registerEvidenceEnrich, registerEvidencePrune } from "./commands/internal/fixture-management.js"; const require = createRequire(import.meta.url); @@ -87,6 +87,7 @@ registerEvidenceEnrich(cli); registerEvidencePrune(cli); registerFilterDiscoveryEvidence(cli); registerApplyDecision(cli); +registerCollectGapEvidence(cli); // ============================================ // Documentation command