Skip to content
Merged
8 changes: 7 additions & 1 deletion .claude/commands/calibrate-loop.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,13 @@ Proceed to Step 4.

After the Gap Analyzer returns, **you** write the JSON to `$RUN_DIR/gaps.json`.

> **Note**: Discovery evidence from gap analysis is collected programmatically by the orchestrator during Step 4 (Evaluation). Do not manually append to `data/discovery-evidence.json`.
Then collect uncovered actionable gaps into discovery evidence (deterministic CLI — no LLM):

```bash
npx canicode calibrate-collect-gap-evidence $RUN_DIR
```

This reads `gaps.json`, extracts gaps where `actionable: true` and `coveredByRule: null`, and appends them to `data/discovery-evidence.json` as `source: "gap-analysis"` entries.

Append to `$RUN_DIR/activity.jsonl`:
```json
Expand Down
26 changes: 22 additions & 4 deletions src/agents/evidence-collector.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ describe("evidence-collector", () => {
], disPath);

appendDiscoveryEvidence([
{ description: "gap2", category: "color", impact: "moderate", fixture: "fx2", timestamp: "t2", source: "gap-analysis" },
{ description: "gap2", category: "code-quality", impact: "moderate", fixture: "fx2", timestamp: "t2", source: "gap-analysis" },
], disPath);

const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { entries: DiscoveryEvidenceEntry[] };
Expand Down Expand Up @@ -415,7 +415,7 @@ describe("evidence-collector", () => {
]), "utf-8");

appendDiscoveryEvidence([
{ description: "new", category: "color", impact: "easy", fixture: "fx2", timestamp: "t1", source: "gap-analysis" },
{ description: "new", category: "pixel-critical", impact: "easy", fixture: "fx2", timestamp: "t1", source: "gap-analysis" },
], disPath);

const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { schemaVersion: number; entries: DiscoveryEvidenceEntry[] };
Expand All @@ -435,6 +435,24 @@ describe("evidence-collector", () => {
expect(after).toBe(before);
});

it("warns on non-standard category", () => {
const spy = vi.spyOn(console, "warn").mockImplementation(() => {});
appendDiscoveryEvidence([
{ description: "gap1", category: "old-structure", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" },
], disPath);
expect(spy).toHaveBeenCalledWith(expect.stringContaining('Non-standard category "old-structure"'));
spy.mockRestore();
});

it("does not warn on standard category", () => {
const spy = vi.spyOn(console, "warn").mockImplementation(() => {});
appendDiscoveryEvidence([
{ description: "gap1", category: "pixel-critical", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" },
], disPath);
expect(spy).not.toHaveBeenCalled();
spy.mockRestore();
});

it("throws when file has unsupported schemaVersion", () => {
const file = { schemaVersion: 999, entries: [] };
writeFileSync(disPath, JSON.stringify(file), "utf-8");
Expand All @@ -454,14 +472,14 @@ describe("evidence-collector", () => {
appendDiscoveryEvidence([
{ description: "gap1", category: "Pixel-critical", impact: "hard", fixture: "fx1", timestamp: "t1", source: "evaluation" },
{ description: "gap2", category: "pixel-critical", impact: "hard", fixture: "fx2", timestamp: "t2", source: "gap-analysis" },
{ description: "gap3", category: "color", impact: "moderate", fixture: "fx1", timestamp: "t1", source: "evaluation" },
{ description: "gap3", category: "token-management", impact: "moderate", fixture: "fx1", timestamp: "t1", source: "evaluation" },
], disPath);

pruneDiscoveryEvidence(["pixel-critical"], disPath);

const raw = JSON.parse(readFileSync(disPath, "utf-8")) as { entries: DiscoveryEvidenceEntry[] };
expect(raw.entries).toHaveLength(1);
expect(raw.entries[0]!.category).toBe("color");
expect(raw.entries[0]!.category).toBe("token-management");
});

it("writes versioned format after prune", () => {
Expand Down
10 changes: 10 additions & 0 deletions src/agents/evidence-collector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
DiscoveryEvidenceFileSchema,
DISCOVERY_EVIDENCE_SCHEMA_VERSION,
} from "./contracts/evidence.js";
import { CategorySchema } from "../core/contracts/category.js";
import type {
CalibrationEvidenceEntry,
CrossRunEvidence,
Expand Down Expand Up @@ -293,6 +294,15 @@ export function appendDiscoveryEvidence(
evidencePath: string = DEFAULT_DISCOVERY_PATH
): void {
if (entries.length === 0) return;

// Warn on non-standard categories (safety net for converter typos/old labels)
for (const e of entries) {
const parsed = CategorySchema.safeParse(e.category);
if (!parsed.success) {
console.warn(`[evidence] Non-standard category "${e.category}" in discovery evidence (expected: ${CategorySchema.options.join(", ")})`);
}
}

const existing = readDiscoveryEvidence(evidencePath);

// Build map of existing entries keyed by dedupe key
Expand Down
17 changes: 6 additions & 11 deletions src/cli/commands/internal/calibrate-debate.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { existsSync, readFileSync, writeFileSync } from "node:fs";
import { join, resolve } from "node:path";
import { join } from "node:path";
import type { CAC } from "cac";

import { parseDebateResult } from "../../../agents/run-directory.js";
import { loadCalibrationEvidence } from "../../../agents/evidence-collector.js";
import { resolveRunDir } from "./cli-helpers.js";

// ─── calibrate-gather-evidence ──────────────────────────────────────────────

Expand Down Expand Up @@ -103,11 +104,8 @@ export function registerGatherEvidence(cli: CAC): void {
"Gather structured evidence for Critic from run artifacts + cross-run data"
)
.action((runDir: string) => {
const dir = resolve(runDir);
if (!existsSync(dir)) {
console.log(`Run directory not found: ${runDir}`);
return;
}
const dir = resolveRunDir(runDir);
if (!dir) return;

const proposedRuleIds = loadProposedRuleIds(dir);
const evidence = gatherEvidence(dir, proposedRuleIds);
Expand All @@ -134,11 +132,8 @@ export function registerFinalizeDebate(cli: CAC): void {
"Check early-stop or determine stoppingReason after debate"
)
.action((runDir: string) => {
const dir = resolve(runDir);
if (!existsSync(dir)) {
console.log(`Run directory not found: ${runDir}`);
return;
}
const dir = resolveRunDir(runDir);
if (!dir) return;

const debate = parseDebateResult(dir);
if (!debate) {
Expand Down
30 changes: 30 additions & 0 deletions src/cli/commands/internal/cli-helpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { existsSync, statSync } from "node:fs";
import { resolve } from "node:path";
import { z } from "zod";

export const RUN_DIR_ARG_SCHEMA = z.string().trim().min(1, "runDir is required");
export const KEYWORD_ARG_SCHEMA = z.string().trim().min(1, "keyword is required");

/**
* Validate and resolve a run directory path.
* Returns the resolved absolute path, or null if invalid/missing/not a directory.
* Logs to stdout and returns null on failure (internal CLI convention).
*/
export function resolveRunDir(runDir: string): string | null {
const parsed = RUN_DIR_ARG_SCHEMA.safeParse(runDir);
if (!parsed.success) {
console.log(`Invalid runDir: ${parsed.error.issues[0]?.message}`);
return null;
}
const dir = resolve(parsed.data);
try {
if (!existsSync(dir) || !statSync(dir).isDirectory()) {
console.log(`Run directory not found or is not a directory: ${runDir}`);
return null;
}
} catch {
console.log(`Run directory not accessible: ${runDir}`);
return null;
}
return dir;
}
19 changes: 7 additions & 12 deletions src/cli/commands/internal/fixture-management.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { existsSync } from "node:fs";
import { basename, resolve } from "node:path";
import type { CAC } from "cac";
import { resolveRunDir } from "./cli-helpers.js";

import {
listActiveFixtures,
Expand Down Expand Up @@ -121,11 +121,8 @@ export function registerEvidenceEnrich(cli: CAC): void {
"Enrich evidence with Critic's pro/con/confidence from debate.json"
)
.action((runDir: string) => {
const resolvedDir = resolve(runDir);
if (!existsSync(resolvedDir)) {
console.log(`Run directory not found: ${runDir}`);
return;
}
const resolvedDir = resolveRunDir(runDir);
if (!resolvedDir) return;
const debate = parseDebateResult(resolvedDir);
if (!debate?.critic) {
console.log("No critic reviews in debate.json — nothing to enrich.");
Expand Down Expand Up @@ -161,11 +158,9 @@ export function registerEvidencePrune(cli: CAC): void {
"Prune evidence for rules applied by the Arbitrator in the given run"
)
.action((runDir: string) => {
if (!existsSync(resolve(runDir))) {
console.log(`Run directory not found: ${runDir}`);
return;
}
const debate = parseDebateResult(resolve(runDir));
const resolvedDir = resolveRunDir(runDir);
if (!resolvedDir) return;
const debate = parseDebateResult(resolvedDir);
if (!debate) {
console.log("No debate.json found — nothing to prune.");
return;
Expand Down Expand Up @@ -197,4 +192,4 @@ export function registerEvidencePrune(cli: CAC): void {
process.exitCode = 1;
}
});
}
}
62 changes: 61 additions & 1 deletion src/cli/commands/internal/rule-discovery.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { join } from "node:path";
import { tmpdir } from "node:os";
import { rm } from "node:fs/promises";

import { filterDiscoveryEvidence, readDecision } from "./rule-discovery.js";
import { filterDiscoveryEvidence, readDecision, collectGapEvidence } from "./rule-discovery.js";

describe("filterDiscoveryEvidence", () => {
it("returns empty array when no matching evidence exists", () => {
Expand Down Expand Up @@ -118,3 +118,63 @@ describe("readDecision", () => {
expect(readDecision(runDir)).toBeNull();
});
});

describe("collectGapEvidence", () => {
let runDir: string;

beforeEach(() => {
runDir = mkdtempSync(join(tmpdir(), "gap-evidence-test-"));
});

afterEach(async () => {
await rm(runDir, { recursive: true, force: true });
});

it("extracts uncovered actionable gaps", () => {
writeFileSync(join(runDir, "gaps.json"), JSON.stringify({
gaps: [
{ category: "spacing", description: "padding off", actionable: true, coveredByRule: null },
{ category: "color", description: "wrong shade", actionable: true, coveredByRule: null },
{ category: "rendering", description: "font fallback", actionable: false },
{ category: "layout", description: "flex gap", actionable: true, coveredByRule: "no-auto-layout" },
],
}));

const entries = collectGapEvidence(runDir, "test-fixture");
expect(entries).toHaveLength(2);
expect(entries[0]!.category).toBe("spacing");
expect(entries[0]!.source).toBe("gap-analysis");
expect(entries[0]!.fixture).toBe("test-fixture");
expect(entries[1]!.category).toBe("color");
});

it("returns empty for no gaps.json", () => {
expect(collectGapEvidence(runDir, "fx")).toHaveLength(0);
});

it("returns empty when all gaps are covered or non-actionable", () => {
writeFileSync(join(runDir, "gaps.json"), JSON.stringify({
gaps: [
{ category: "spacing", description: "x", actionable: false },
{ category: "color", description: "y", actionable: true, coveredByRule: "raw-value" },
],
}));

expect(collectGapEvidence(runDir, "fx")).toHaveLength(0);
});

it("skips actionable gap when coveredByRule is empty string", () => {
writeFileSync(join(runDir, "gaps.json"), JSON.stringify({
gaps: [
{ category: "spacing", description: "x", actionable: true, coveredByRule: "" },
],
}));

expect(collectGapEvidence(runDir, "fx")).toHaveLength(0);
});

it("returns empty for malformed gaps.json", () => {
writeFileSync(join(runDir, "gaps.json"), "not json");
expect(collectGapEvidence(runDir, "fx")).toHaveLength(0);
});
});
Loading
Loading