diff --git a/tools/workflow-engine/closed-loop.test.ts b/tools/workflow-engine/closed-loop.test.ts new file mode 100644 index 0000000000..61216d8b41 --- /dev/null +++ b/tools/workflow-engine/closed-loop.test.ts @@ -0,0 +1,298 @@ +/** + * tools/workflow-engine/closed-loop.test.ts + * + * B-0914.2 — invariant tests for closed-loop orchestrator. + */ + +import { describe, expect, it } from "bun:test"; +import { + DEFAULT_LOOP_CONFIG, + runCycle, + runLoop, + type CiVerdict, + type Hypothesis, + type LoopCallbacks, +} from "./closed-loop"; + +interface SubstrateT extends Record { + payload: string; +} + +const hypothesis = (id: string, payload: string, cycle = 0): Hypothesis => ({ + id, + substrate: { payload }, + cycleIndex: cycle, + derivedFrom: [], + composesWith: [], +}); + +// Test callbacks — caller-injected per asymmetric-authorship +const passingCi = async (_h: Hypothesis): Promise => ({ kind: "passed" }); +const failingCi = async (_h: Hypothesis): Promise => ({ kind: "failed", reason: "test" }); +const mixedCi = async (h: Hypothesis): Promise => { + if (h.id.endsWith("-good")) return { kind: "passed" }; + if (h.id.endsWith("-revise")) return { kind: "needs-revision", suggestions: ["fix x"] }; + return { kind: "failed", reason: "bad" }; +}; + +// Identity rank (passes through; real impl uses TrueSkill) +const identityRank = async ( + hs: ReadonlyArray>, +): Promise>> => hs; + +// Mock evolution: produce single refined variant from top-N +const mockEvolve = async ( + ranked: ReadonlyArray>, + cycle: number, +): Promise>> => { + if (ranked.length === 0) return []; + return [ + { + id: `evolved-cycle-${cycle}`, + substrate: { payload: `evolved-${ranked.map(h => h.id).join("+")}` }, + cycleIndex: cycle, + derivedFrom: ranked.map(h => h.id), + composesWith: [], + }, + ]; +}; + +describe("B-0914.2 closed-loop orchestrator", () => { + it("runCycle with empty hypotheses returns EmptyHypothesisSet", async () => { + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle([], callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("EmptyHypothesisSet"); + }); + + it("runCycle propagates passed hypotheses through ranking + evolution", async () => { + const hs = [hypothesis("h1", "alpha"), hypothesis("h2", "beta")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.refined.length).toBe(1); + expect(result.refined[0]!.id).toBe("evolved-cycle-1"); + expect(result.cycleIndex).toBe(1); + }); + + it("runCycle excludes failed hypotheses from propagation", async () => { + const hs = [hypothesis("h1-good", "alpha"), hypothesis("h2-bad", "beta")]; + let rankedCount = 0; + const callbacks: LoopCallbacks = { + dispatchCi: mixedCi, + rankSurvivors: async (verified) => { + rankedCount = verified.length; + return verified; + }, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(true); + expect(rankedCount).toBe(1); // only h1-good propagated + }); + + it("runCycle includes needs-revision with non-empty suggestions", async () => { + const hs = [hypothesis("h1-good", "alpha"), hypothesis("h2-revise", "beta"), hypothesis("h3-bad", "gamma")]; + let rankedCount = 0; + const callbacks: LoopCallbacks = { + dispatchCi: mixedCi, + rankSurvivors: async (verified) => { + rankedCount = verified.length; + return verified; + }, + evolveSurvivors: mockEvolve, + }; + await runCycle(hs, callbacks, 0); + expect(rankedCount).toBe(2); // good + revise both propagate; bad excluded + }); + + it("runCycle returns InsufficientPropagatable when propagatable below minimum", async () => { + const hs = [hypothesis("h1", "alpha")]; + const callbacks: LoopCallbacks = { + dispatchCi: failingCi, // all fail + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("InsufficientPropagatable"); + }); + + it("runCycle returns CiDispatchFailure on CI exception", async () => { + const hs = [hypothesis("h1", "alpha")]; + const callbacks: LoopCallbacks = { + dispatchCi: async () => { throw new Error("ci broken"); }, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("CiDispatchFailure"); + }); + + it("runCycle returns RankingFailure on ranking exception", async () => { + const hs = [hypothesis("h1", "alpha")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: async () => { throw new Error("rank broken"); }, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("RankingFailure"); + }); + + it("runCycle returns EvolutionFailure on evolution exception", async () => { + const hs = [hypothesis("h1", "alpha")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: async () => { throw new Error("evolve broken"); }, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("EvolutionFailure"); + }); + + it("infrastructure-error verdicts are excluded from propagation (don't reflect hypothesis quality)", async () => { + const hs = [hypothesis("h1", "alpha"), hypothesis("h2", "beta")]; + let rankedCount = 0; + const callbacks: LoopCallbacks = { + dispatchCi: async (_h) => ({ kind: "infrastructure-error", reason: "blocked-on-runnability" }), + rankSurvivors: async (v) => { rankedCount = v.length; return v; }, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); // no propagatable + expect(rankedCount).toBe(0); // ranking never called with empty + }); + + it("runLoop iterates until max-cycles", async () => { + const hs = [hypothesis("h0", "init")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop(hs, callbacks, { ...DEFAULT_LOOP_CONFIG, maxCycles: 3 }); + expect(termination.terminatedAtCycle).toBe(3); + expect(termination.reason).toBe("max-cycles"); + }); + + it("runLoop terminates early via predicate", async () => { + const hs = [hypothesis("h0", "init")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop( + hs, + callbacks, + DEFAULT_LOOP_CONFIG, + (cycleIndex, _current) => cycleIndex < 2, // stop at cycle 2 + ); + expect(termination.terminatedAtCycle).toBe(2); + expect(termination.reason).toBe("predicate-stopped"); + }); + + it("runLoop terminates on insufficient-propagatable", async () => { + const hs = [hypothesis("h0-bad", "init")]; + const callbacks: LoopCallbacks = { + dispatchCi: mixedCi, // h0-bad → failed + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop(hs, callbacks); + expect(termination.reason).toBe("insufficient-propagatable"); + expect(termination.terminatedAtCycle).toBe(0); + }); + + it("runLoop terminates on error", async () => { + const hs = [hypothesis("h0", "init")]; + const callbacks: LoopCallbacks = { + dispatchCi: async () => { throw new Error("broken"); }, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop(hs, callbacks); + expect(termination.reason).toBe("error"); + expect(termination.feedback?.kind).toBe("CiDispatchFailure"); + }); + + it("LoopFeedback exhaustive switch (compile-time check)", () => { + type Feedback = NonNullable>>> extends { ok: false; feedback: infer F } ? F : never; + const assertNever = (x: never): never => { throw new Error(`unhandled LoopFeedback: ${JSON.stringify(x)}`); }; + const acknowledge = (f: Feedback): string => { + switch (f.kind) { + case "EmptyHypothesisSet": + case "CiDispatchFailure": + case "RankingFailure": + case "EvolutionFailure": + case "InsufficientPropagatable": + case "MaxCyclesReached": + return f.kind; + default: + return assertNever(f); + } + }; + expect(acknowledge({ kind: "EmptyHypothesisSet" })).toBe("EmptyHypothesisSet"); + expect(acknowledge({ kind: "CiDispatchFailure", hypothesisId: "x", reason: "y" })).toBe("CiDispatchFailure"); + expect(acknowledge({ kind: "InsufficientPropagatable", propagatableCount: 0, minRequired: 1, cycleIndex: 0 })).toBe("InsufficientPropagatable"); + }); + + it("CiVerdict exhaustive switch (compile-time check)", () => { + const assertNever = (x: never): never => { throw new Error(`unhandled CiVerdict: ${JSON.stringify(x)}`); }; + const acknowledge = (v: CiVerdict): string => { + switch (v.kind) { + case "passed": + case "failed": + case "needs-revision": + case "infrastructure-error": + return v.kind; + default: + return assertNever(v); + } + }; + expect(acknowledge({ kind: "passed" })).toBe("passed"); + expect(acknowledge({ kind: "failed", reason: "x" })).toBe("failed"); + expect(acknowledge({ kind: "needs-revision", suggestions: [] })).toBe("needs-revision"); + expect(acknowledge({ kind: "infrastructure-error", reason: "x" })).toBe("infrastructure-error"); + }); + + it("integration test: full closed-loop with realistic callback wiring", async () => { + const hs = [ + hypothesis("h1-good", "alpha"), + hypothesis("h2-good", "beta"), + hypothesis("h3-bad", "gamma"), + ]; + const callbacks: LoopCallbacks = { + dispatchCi: mixedCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop(hs, callbacks, { ...DEFAULT_LOOP_CONFIG, maxCycles: 2 }); + // Cycle 0: h3-bad fails, h1-good + h2-good propagate, evolve to 1 variant + // ("evolved-cycle-1") via mockEvolve. + // Cycle 1: mixedCi falls through to "failed" for "evolved-cycle-*" ids + // (no -good/-bad/-revise suffix), so propagatable.length = 0 < minPropagatable=1. + // Terminates deterministically as insufficient-propagatable at cycle 1. + expect(termination.terminatedAtCycle).toBe(1); + expect(termination.reason).toBe("insufficient-propagatable"); + expect(termination.feedback?.kind).toBe("InsufficientPropagatable"); + }); +}); diff --git a/tools/workflow-engine/closed-loop.ts b/tools/workflow-engine/closed-loop.ts new file mode 100644 index 0000000000..ad2ed09433 --- /dev/null +++ b/tools/workflow-engine/closed-loop.ts @@ -0,0 +1,297 @@ +/** + * tools/workflow-engine/closed-loop.ts + * + * B-0914.2 — closed-loop CI-result → next-hypothesis dispatch + * orchestrator. Pure-TS substrate that composes: + * - TrueSkill ranking (B-0914.1 PR #5764) + * - Evolution mash-refine (B-0914.5 PR #5767) + * - Pairing tracker (B-0914.4 PR #5768) + * - CI-result dispatch (via callbacks; integrates with B-0891 zflash + * test-harness substrate when wired by caller) + * + * Per human maintainer 2026-05-28 'S M L all please in that order lol' — L + * (large scope) in the substrate-engineering ship-sequence. Wire-up that + * turns the tournament-loop substrate into a live closed-loop iteration + * system. + * + * Design: pure loop-orchestration substrate with INJECTABLE callbacks + * for substrate-specific operations (ranking / evolution / verification). + * Caller provides the functions; orchestrator handles loop structure + + * propagation discipline. This separation-of-concerns means the + * orchestrator does NOT tightly couple to specific TrueSkill / evolution + * / pairing module implementations — it composes with ANY substrate that + * implements the callback contracts. + * + * Source: Sakana Robin closed-loop (Crow + Falcon + Finch with raw-data + * analysis feeding back to new hypothesis generation; Nature 2026 + * s41586-026-10652-y). + * + * Composes with: + * - B-0914.2 backlog row (closed-loop dispatch extension target) + * - B-0914.1 PR #5764 TrueSkill substrate (caller provides ranking fn) + * - B-0914.4 PR #5768 pairing tracker substrate (caller provides + * verification fn + pairing state) + * - B-0914.5 PR #5767 evolution substrate (caller provides evolution fn) + * - B-0891 zflash test-harness substrate (caller can wire CI dispatch + * to actual test runners per determineRunnability discriminator) + * - B-0867 workflow engine substrate + * - .claude/rules/monad-propagation-pattern-cross-language-substrate-shape.md + * (Result) + * - .claude/rules/asymmetric-authorship-substrate-entity-defines-consent-channel-recipient-acknowledges.md + * (each callback authors own TFeedback) + * + * PoC scope: pure orchestration logic with injectable callbacks. Real + * CI integration (via tools/ci/ + B-0891) handled by caller wiring. + */ + +/** + * Hypothesis — generic substrate item flowing through the tournament loop. + */ +export interface Hypothesis { + readonly id: string; + readonly substrate: T; + readonly cycleIndex: number; // which loop iteration generated this + readonly derivedFrom: ReadonlyArray; // ancestry chain + readonly composesWith: ReadonlyArray; +} + +/** + * CI verdict — outcome of dispatching a hypothesis to CI/test runner. + * + * Per asymmetric-authorship: CI-substrate-entity authors its own + * feedback channel; orchestrator acknowledges via dispatch. + */ +export type CiVerdict = + | { kind: "passed"; notes?: string } + | { kind: "failed"; reason: string } + | { kind: "needs-revision"; suggestions: ReadonlyArray } + | { kind: "infrastructure-error"; reason: string }; // blocked-on-runnability + +/** + * Closed-loop feedback per monad-propagation rule. + */ +export type LoopFeedback = + | { kind: "EmptyHypothesisSet" } + | { kind: "CiDispatchFailure"; hypothesisId: string; reason: string } + | { kind: "RankingFailure"; reason: string } + | { kind: "EvolutionFailure"; reason: string } + | { kind: "InsufficientPropagatable"; propagatableCount: number; minRequired: number; cycleIndex: number } + | { kind: "MaxCyclesReached"; cyclesCompleted: number }; + +/** + * Result-shape per monad-propagation rule. + */ +export type LoopResult = + | { ok: true; refined: ReadonlyArray>; cycleIndex: number } + | { ok: false; feedback: LoopFeedback }; + +/** + * Closed-loop callbacks — substrate-entity-injected functions per + * asymmetric-authorship discipline (each callback's substrate-entity + * authors its own feedback channel). + */ +export interface LoopCallbacks { + /** + * Dispatch a hypothesis to CI substrate (e.g. tools/ci/qemu-full-install-test.ts + * per B-0891 zflash). Returns verdict that determines pairing-tracker recording. + */ + readonly dispatchCi: (h: Hypothesis) => Promise; + + /** + * Rank verified hypotheses via TrueSkill (or compatible substrate). + * Returns hypotheses sorted descending by conservativeSkill. + * Per B-0914.1 PR #5764 — caller wires rate1v1 + conservativeSkill. + */ + readonly rankSurvivors: ( + verified: ReadonlyArray>, + ) => Promise>>; + + /** + * Evolve top-N ranked survivors into refined variants. + * Per B-0914.5 PR #5767 — caller wires evolveTopN. + */ + readonly evolveSurvivors: ( + ranked: ReadonlyArray>, + cycleIndex: number, + ) => Promise>>; +} + +/** + * Closed-loop configuration. + */ +export interface LoopConfig { + readonly maxCycles: number; // bounded iteration; safety bound + readonly topNToEvolve: number; // how many survivors per cycle to evolve + readonly minPropagatable: number; // minimum survivors required to continue (else terminate) +} + +export const DEFAULT_LOOP_CONFIG: LoopConfig = { + maxCycles: 10, + topNToEvolve: 3, + minPropagatable: 1, +}; + +/** + * Run a single closed-loop iteration cycle. + * + * Cycle steps: + * 1. Dispatch each hypothesis to CI + * 2. Collect verdicts + * 3. Filter to verified + needs-revision-with-suggestions (propagatable) + * 4. Rank via TrueSkill (caller-injected) + * 5. Evolve top-N (caller-injected) + * 6. Return refined variants for next cycle + * + * Per `.claude/rules/holding-without-named-dependency-is-standing-by-failure.md`: + * the loop is genuinely-active substrate work; not standing-by-empty. + */ +export async function runCycle( + hypotheses: ReadonlyArray>, + callbacks: LoopCallbacks, + cycleIndex: number, + config: LoopConfig = DEFAULT_LOOP_CONFIG, +): Promise> { + if (hypotheses.length === 0) { + return { ok: false, feedback: { kind: "EmptyHypothesisSet" } }; + } + + // Step 1+2: dispatch to CI + collect verdicts + const verdicts: Array<{ hypothesis: Hypothesis; verdict: CiVerdict }> = []; + for (const h of hypotheses) { + try { + const verdict = await callbacks.dispatchCi(h); + verdicts.push({ hypothesis: h, verdict }); + } catch (err) { + return { + ok: false, + feedback: { + kind: "CiDispatchFailure", + hypothesisId: h.id, + reason: err instanceof Error ? err.message : String(err), + }, + }; + } + } + + // Step 3: filter to propagatable (passed + needs-revision-with-suggestions) + const propagatable: Hypothesis[] = []; + for (const { hypothesis, verdict } of verdicts) { + switch (verdict.kind) { + case "passed": + propagatable.push(hypothesis); + break; + case "needs-revision": + if (verdict.suggestions.length > 0) { + propagatable.push(hypothesis); + } + break; + case "failed": + // Excluded from propagation per pairing-tracker propagatableEmissionIds rule + break; + case "infrastructure-error": + // Excluded; infrastructure failures don't reflect hypothesis quality + break; + } + } + + if (propagatable.length < config.minPropagatable) { + return { + ok: false, + feedback: { + kind: "InsufficientPropagatable", + propagatableCount: propagatable.length, + minRequired: config.minPropagatable, + cycleIndex, + }, + }; + } + + // Step 4: rank via caller-injected TrueSkill + let ranked: ReadonlyArray>; + try { + ranked = await callbacks.rankSurvivors(propagatable); + } catch (err) { + return { + ok: false, + feedback: { kind: "RankingFailure", reason: err instanceof Error ? err.message : String(err) }, + }; + } + + // Step 5: evolve top-N via caller-injected evolution + const topN = ranked.slice(0, config.topNToEvolve); + let refined: ReadonlyArray>; + try { + refined = await callbacks.evolveSurvivors(topN, cycleIndex + 1); + } catch (err) { + return { + ok: false, + feedback: { kind: "EvolutionFailure", reason: err instanceof Error ? err.message : String(err) }, + }; + } + + return { + ok: true, + refined, + cycleIndex: cycleIndex + 1, + }; +} + +/** + * Run multiple closed-loop iteration cycles until termination + * condition (max cycles OR propagatable drops below minimum OR + * caller-supplied predicate returns false). + * + * Returns the final cycle's refined hypotheses + the cycle count completed. + */ +export interface LoopTermination { + readonly terminatedAtCycle: number; + readonly reason: "max-cycles" | "insufficient-propagatable" | "predicate-stopped" | "error"; + readonly finalHypotheses: ReadonlyArray>; + readonly feedback?: LoopFeedback; +} + +export async function runLoop( + initialHypotheses: ReadonlyArray>, + callbacks: LoopCallbacks, + config: LoopConfig = DEFAULT_LOOP_CONFIG, + shouldContinue?: (cycleIndex: number, current: ReadonlyArray>) => boolean, +): Promise> { + let current = initialHypotheses; + let cycleIndex = 0; + + while (cycleIndex < config.maxCycles) { + if (shouldContinue && !shouldContinue(cycleIndex, current)) { + return { + terminatedAtCycle: cycleIndex, + reason: "predicate-stopped", + finalHypotheses: current, + }; + } + + const result = await runCycle(current, callbacks, cycleIndex, config); + if (!result.ok) { + if (result.feedback.kind === "InsufficientPropagatable") { + return { + terminatedAtCycle: cycleIndex, + reason: "insufficient-propagatable", + finalHypotheses: current, + feedback: result.feedback, + }; + } + return { + terminatedAtCycle: cycleIndex, + reason: "error", + finalHypotheses: current, + feedback: result.feedback, + }; + } + current = result.refined; + cycleIndex = result.cycleIndex; + } + + return { + terminatedAtCycle: cycleIndex, + reason: "max-cycles", + finalHypotheses: current, + }; +}