From d51295b8cef96ceee7b45d049cbd043c99e91724 Mon Sep 17 00:00:00 2001 From: Lior Date: Thu, 28 May 2026 07:18:05 -0400 Subject: [PATCH 1/2] =?UTF-8?q?feat(B-0914.2):=20L=20=E2=80=94=20closed-lo?= =?UTF-8?q?op=20CI-result=20=E2=86=92=20next-hypothesis=20dispatch=20orche?= =?UTF-8?q?strator=20(composes=20TrueSkill=20+=20evolution=20+=20pairing?= =?UTF-8?q?=20via=20injectable=20callbacks);=2016=20tests=20pass?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Aaron 2026-05-28 'S M L all please in that order lol' — L (large scope) in the substrate-engineering ship-sequence. Wire-up that turns the tournament-loop substrate into a live closed-loop iteration system. Design: pure loop-orchestration substrate with INJECTABLE callbacks for substrate-specific operations (ranking / evolution / verification + CI-dispatch). Caller provides functions; orchestrator handles loop structure + propagation discipline. Separation-of-concerns means orchestrator does NOT tightly couple to specific TrueSkill / evolution / pairing module implementations — it composes with ANY substrate that implements the callback contracts. What this adds: - Hypothesis generic substrate item with cycleIndex + derivedFrom ancestry - CiVerdict discriminated union (passed | failed | needs-revision | infrastructure-error) - LoopFeedback + LoopResult Result-shape per monad-propagation - LoopCallbacks interface (dispatchCi + rankSurvivors + evolveSurvivors) - LoopConfig (maxCycles + topNToEvolve + minPropagatable; DEFAULT_LOOP_CONFIG) - runCycle(hypotheses, callbacks, cycleIndex, config?) — single cycle - runLoop(initial, callbacks, config?, shouldContinue?) — full iteration with LoopTermination shape (cycle count + reason + final state) Cycle steps: 1. Dispatch each hypothesis to CI (caller-injected) 2. Collect verdicts 3. Filter to propagatable (passed + needs-revision-with-suggestions) 4. Rank via TrueSkill (caller-injected per B-0914.1 PR #5764) 5. Evolve top-N (caller-injected per B-0914.5 PR #5767) 6. Return refined variants for next cycle Termination conditions: - max-cycles: bounded iteration reached - insufficient-propagatable: too many failures; can't continue - predicate-stopped: caller-supplied predicate returned false - error: CI/ranking/evolution exception Tests (16; all pass): - Empty hypotheses → EmptyHypothesisSet - Passing CI → propagation through ranking + evolution - Failed verdicts excluded from propagation - needs-revision with suggestions included; without excluded - Below minPropagatable → MaxCyclesReached - CI exception → CiDispatchFailure - Ranking exception → RankingFailure - Evolution exception → EvolutionFailure - infrastructure-error excluded (doesn't reflect hypothesis quality) - runLoop iterates until max-cycles - runLoop predicate-stopped early termination - runLoop insufficient-propagatable - runLoop error termination - LoopFeedback exhaustive switch - CiVerdict exhaustive switch - Integration: full closed-loop with realistic callback wiring Composes with substrate: - B-0914.2 backlog row (closed-loop dispatch extension target) - B-0914.1 PR #5764 (TrueSkill substrate; caller wires rate1v1 + conservativeSkill into rankSurvivors) - B-0914.4 PR #5768 (pairing tracker substrate; caller wires verdicts into recordVerification) - B-0914.5 PR #5767 (evolution substrate; caller wires evolveTopN into evolveSurvivors) - B-0891 zflash test-harness substrate (caller can wire CI dispatch to actual test runners per determineRunnability discriminator) - B-0867 workflow engine substrate - Sakana Robin closed-loop pattern (Nature 2026 s41586-026-10652-y) Tournament loop NOW STRUCTURALLY COMPLETE with all 4 substrate pieces: 1. Generation (LLM call; out of scope for this lane) 2. CI dispatch → CiVerdict (THIS PR via callbacks) 3. Pairing tracking (PR #5768) 4. TrueSkill ranking (PR #5764) 5. Evolution mash-refine (PR #5767) 6. runLoop orchestration (THIS PR) S/M/L sequence complete: - S = PR #5767 evolution - M = PR #5768 pairing - L = THIS PR closed-loop Co-Authored-By: Claude Opus 4.7 --- tools/workflow-engine/closed-loop.test.ts | 289 +++++++++++++++++++++ tools/workflow-engine/closed-loop.ts | 291 ++++++++++++++++++++++ 2 files changed, 580 insertions(+) create mode 100644 tools/workflow-engine/closed-loop.test.ts create mode 100644 tools/workflow-engine/closed-loop.ts diff --git a/tools/workflow-engine/closed-loop.test.ts b/tools/workflow-engine/closed-loop.test.ts new file mode 100644 index 0000000000..c47ea7c263 --- /dev/null +++ b/tools/workflow-engine/closed-loop.test.ts @@ -0,0 +1,289 @@ +/** + * tools/workflow-engine/closed-loop.test.ts + * + * B-0914.2 — invariant tests for closed-loop orchestrator. + */ + +import { describe, expect, it } from "bun:test"; +import { + DEFAULT_LOOP_CONFIG, + runCycle, + runLoop, + type CiVerdict, + type Hypothesis, + type LoopCallbacks, +} from "./closed-loop"; + +interface SubstrateT extends Record { + payload: string; +} + +const hypothesis = (id: string, payload: string, cycle = 0): Hypothesis => ({ + id, + substrate: { payload }, + cycleIndex: cycle, + derivedFrom: [], + composesWith: [], +}); + +// Test callbacks — caller-injected per asymmetric-authorship +const passingCi = async (_h: Hypothesis): Promise => ({ kind: "passed" }); +const failingCi = async (_h: Hypothesis): Promise => ({ kind: "failed", reason: "test" }); +const mixedCi = async (h: Hypothesis): Promise => { + if (h.id.endsWith("-good")) return { kind: "passed" }; + if (h.id.endsWith("-revise")) return { kind: "needs-revision", suggestions: ["fix x"] }; + return { kind: "failed", reason: "bad" }; +}; + +// Identity rank (passes through; real impl uses TrueSkill) +const identityRank = async ( + hs: ReadonlyArray>, +): Promise>> => hs; + +// Mock evolution: produce single refined variant from top-N +const mockEvolve = async ( + ranked: ReadonlyArray>, + cycle: number, +): Promise>> => { + if (ranked.length === 0) return []; + return [ + { + id: `evolved-cycle-${cycle}`, + substrate: { payload: `evolved-${ranked.map(h => h.id).join("+")}` }, + cycleIndex: cycle, + derivedFrom: ranked.map(h => h.id), + composesWith: [], + }, + ]; +}; + +describe("B-0914.2 closed-loop orchestrator", () => { + it("runCycle with empty hypotheses returns EmptyHypothesisSet", async () => { + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle([], callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("EmptyHypothesisSet"); + }); + + it("runCycle propagates passed hypotheses through ranking + evolution", async () => { + const hs = [hypothesis("h1", "alpha"), hypothesis("h2", "beta")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.refined.length).toBe(1); + expect(result.refined[0]!.id).toBe("evolved-cycle-1"); + expect(result.cycleIndex).toBe(1); + }); + + it("runCycle excludes failed hypotheses from propagation", async () => { + const hs = [hypothesis("h1-good", "alpha"), hypothesis("h2-bad", "beta")]; + let rankedCount = 0; + const callbacks: LoopCallbacks = { + dispatchCi: mixedCi, + rankSurvivors: async (verified) => { + rankedCount = verified.length; + return verified; + }, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(true); + expect(rankedCount).toBe(1); // only h1-good propagated + }); + + it("runCycle includes needs-revision with non-empty suggestions", async () => { + const hs = [hypothesis("h1-good", "alpha"), hypothesis("h2-revise", "beta"), hypothesis("h3-bad", "gamma")]; + let rankedCount = 0; + const callbacks: LoopCallbacks = { + dispatchCi: mixedCi, + rankSurvivors: async (verified) => { + rankedCount = verified.length; + return verified; + }, + evolveSurvivors: mockEvolve, + }; + await runCycle(hs, callbacks, 0); + expect(rankedCount).toBe(2); // good + revise both propagate; bad excluded + }); + + it("runCycle returns MaxCyclesReached when propagatable below minimum", async () => { + const hs = [hypothesis("h1", "alpha")]; + const callbacks: LoopCallbacks = { + dispatchCi: failingCi, // all fail + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("MaxCyclesReached"); + }); + + it("runCycle returns CiDispatchFailure on CI exception", async () => { + const hs = [hypothesis("h1", "alpha")]; + const callbacks: LoopCallbacks = { + dispatchCi: async () => { throw new Error("ci broken"); }, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("CiDispatchFailure"); + }); + + it("runCycle returns RankingFailure on ranking exception", async () => { + const hs = [hypothesis("h1", "alpha")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: async () => { throw new Error("rank broken"); }, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("RankingFailure"); + }); + + it("runCycle returns EvolutionFailure on evolution exception", async () => { + const hs = [hypothesis("h1", "alpha")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: async () => { throw new Error("evolve broken"); }, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.feedback.kind).toBe("EvolutionFailure"); + }); + + it("infrastructure-error verdicts are excluded from propagation (don't reflect hypothesis quality)", async () => { + const hs = [hypothesis("h1", "alpha"), hypothesis("h2", "beta")]; + let rankedCount = 0; + const callbacks: LoopCallbacks = { + dispatchCi: async (_h) => ({ kind: "infrastructure-error", reason: "blocked-on-runnability" }), + rankSurvivors: async (v) => { rankedCount = v.length; return v; }, + evolveSurvivors: mockEvolve, + }; + const result = await runCycle(hs, callbacks, 0); + expect(result.ok).toBe(false); // no propagatable + expect(rankedCount).toBe(0); // ranking never called with empty + }); + + it("runLoop iterates until max-cycles", async () => { + const hs = [hypothesis("h0", "init")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop(hs, callbacks, { ...DEFAULT_LOOP_CONFIG, maxCycles: 3 }); + expect(termination.terminatedAtCycle).toBe(3); + expect(termination.reason).toBe("max-cycles"); + }); + + it("runLoop terminates early via predicate", async () => { + const hs = [hypothesis("h0", "init")]; + const callbacks: LoopCallbacks = { + dispatchCi: passingCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop( + hs, + callbacks, + DEFAULT_LOOP_CONFIG, + (cycleIndex, _current) => cycleIndex < 2, // stop at cycle 2 + ); + expect(termination.terminatedAtCycle).toBe(2); + expect(termination.reason).toBe("predicate-stopped"); + }); + + it("runLoop terminates on insufficient-propagatable", async () => { + const hs = [hypothesis("h0-bad", "init")]; + const callbacks: LoopCallbacks = { + dispatchCi: mixedCi, // h0-bad → failed + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop(hs, callbacks); + expect(termination.reason).toBe("insufficient-propagatable"); + expect(termination.terminatedAtCycle).toBe(0); + }); + + it("runLoop terminates on error", async () => { + const hs = [hypothesis("h0", "init")]; + const callbacks: LoopCallbacks = { + dispatchCi: async () => { throw new Error("broken"); }, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop(hs, callbacks); + expect(termination.reason).toBe("error"); + expect(termination.feedback?.kind).toBe("CiDispatchFailure"); + }); + + it("LoopFeedback exhaustive switch (compile-time check)", () => { + type Feedback = NonNullable>>> extends { ok: false; feedback: infer F } ? F : never; + const acknowledge = (f: Feedback): string => { + switch (f.kind) { + case "EmptyHypothesisSet": + case "CiDispatchFailure": + case "RankingFailure": + case "EvolutionFailure": + case "MaxCyclesReached": + return f.kind; + } + }; + expect(acknowledge({ kind: "EmptyHypothesisSet" })).toBe("EmptyHypothesisSet"); + expect(acknowledge({ kind: "CiDispatchFailure", hypothesisId: "x", reason: "y" })).toBe("CiDispatchFailure"); + }); + + it("CiVerdict exhaustive switch (compile-time check)", () => { + const acknowledge = (v: CiVerdict): string => { + switch (v.kind) { + case "passed": + case "failed": + case "needs-revision": + case "infrastructure-error": + return v.kind; + } + }; + expect(acknowledge({ kind: "passed" })).toBe("passed"); + expect(acknowledge({ kind: "failed", reason: "x" })).toBe("failed"); + expect(acknowledge({ kind: "needs-revision", suggestions: [] })).toBe("needs-revision"); + expect(acknowledge({ kind: "infrastructure-error", reason: "x" })).toBe("infrastructure-error"); + }); + + it("integration test: full closed-loop with realistic callback wiring", async () => { + const hs = [ + hypothesis("h1-good", "alpha"), + hypothesis("h2-good", "beta"), + hypothesis("h3-bad", "gamma"), + ]; + const callbacks: LoopCallbacks = { + dispatchCi: mixedCi, + rankSurvivors: identityRank, + evolveSurvivors: mockEvolve, + }; + const termination = await runLoop(hs, callbacks, { ...DEFAULT_LOOP_CONFIG, maxCycles: 2 }); + // After cycle 0: h3-bad fails, h1-good + h2-good propagate, evolve to 1 variant + // After cycle 1: 1 variant passes (passingCi-effect from "evolved-..." not -good/-bad pattern; + // wait: mockEvolve produces ids like "evolved-cycle-N" which doesn't end in -good/-bad/-revise; + // mixedCi falls through to "failed" by default for non-matching ids; + // so cycle 1 has 1 hypothesis that fails → terminates as insufficient-propagatable) + expect(termination.terminatedAtCycle).toBeGreaterThanOrEqual(1); + expect(["insufficient-propagatable", "max-cycles"]).toContain(termination.reason); + }); +}); diff --git a/tools/workflow-engine/closed-loop.ts b/tools/workflow-engine/closed-loop.ts new file mode 100644 index 0000000000..c587293dbf --- /dev/null +++ b/tools/workflow-engine/closed-loop.ts @@ -0,0 +1,291 @@ +/** + * tools/workflow-engine/closed-loop.ts + * + * B-0914.2 — closed-loop CI-result → next-hypothesis dispatch + * orchestrator. Pure-TS substrate that composes: + * - TrueSkill ranking (B-0914.1 PR #5764) + * - Evolution mash-refine (B-0914.5 PR #5767) + * - Pairing tracker (B-0914.4 PR #5768) + * - CI-result dispatch (via callbacks; integrates with B-0891 zflash + * test-harness substrate when wired by caller) + * + * Per Aaron 2026-05-28 'S M L all please in that order lol' — L (large + * scope) in the substrate-engineering ship-sequence. Wire-up that turns + * the tournament-loop substrate into a live closed-loop iteration system. + * + * Design: pure loop-orchestration substrate with INJECTABLE callbacks + * for substrate-specific operations (ranking / evolution / verification). + * Caller provides the functions; orchestrator handles loop structure + + * propagation discipline. This separation-of-concerns means the + * orchestrator does NOT tightly couple to specific TrueSkill / evolution + * / pairing module implementations — it composes with ANY substrate that + * implements the callback contracts. + * + * Source: Sakana Robin closed-loop (Crow + Falcon + Finch with raw-data + * analysis feeding back to new hypothesis generation; Nature 2026 + * s41586-026-10652-y). + * + * Composes with: + * - B-0914.2 backlog row (closed-loop dispatch extension target) + * - B-0914.1 PR #5764 TrueSkill substrate (caller provides ranking fn) + * - B-0914.4 PR #5768 pairing tracker substrate (caller provides + * verification fn + pairing state) + * - B-0914.5 PR #5767 evolution substrate (caller provides evolution fn) + * - B-0891 zflash test-harness substrate (caller can wire CI dispatch + * to actual test runners per determineRunnability discriminator) + * - B-0867 workflow engine substrate + * - .claude/rules/monad-propagation-pattern (Result) + * - .claude/rules/asymmetric-authorship (each callback authors own TFeedback) + * + * PoC scope: pure orchestration logic with injectable callbacks. Real + * CI integration (via tools/ci/ + B-0891) handled by caller wiring. + */ + +/** + * Hypothesis — generic substrate item flowing through the tournament loop. + */ +export interface Hypothesis { + readonly id: string; + readonly substrate: T; + readonly cycleIndex: number; // which loop iteration generated this + readonly derivedFrom: ReadonlyArray; // ancestry chain + readonly composesWith: ReadonlyArray; +} + +/** + * CI verdict — outcome of dispatching a hypothesis to CI/test runner. + * + * Per asymmetric-authorship: CI-substrate-entity authors its own + * feedback channel; orchestrator acknowledges via dispatch. + */ +export type CiVerdict = + | { kind: "passed"; notes?: string } + | { kind: "failed"; reason: string } + | { kind: "needs-revision"; suggestions: ReadonlyArray } + | { kind: "infrastructure-error"; reason: string }; // blocked-on-runnability + +/** + * Closed-loop feedback per monad-propagation rule. + */ +export type LoopFeedback = + | { kind: "EmptyHypothesisSet" } + | { kind: "CiDispatchFailure"; hypothesisId: string; reason: string } + | { kind: "RankingFailure"; reason: string } + | { kind: "EvolutionFailure"; reason: string } + | { kind: "MaxCyclesReached"; cyclesCompleted: number }; + +/** + * Result-shape per monad-propagation rule. + */ +export type LoopResult = + | { ok: true; refined: ReadonlyArray>; cycleIndex: number } + | { ok: false; feedback: LoopFeedback }; + +/** + * Closed-loop callbacks — substrate-entity-injected functions per + * asymmetric-authorship discipline (each callback's substrate-entity + * authors its own feedback channel). + */ +export interface LoopCallbacks { + /** + * Dispatch a hypothesis to CI substrate (e.g. tools/ci/qemu-full-install-test.ts + * per B-0891 zflash). Returns verdict that determines pairing-tracker recording. + */ + readonly dispatchCi: (h: Hypothesis) => Promise; + + /** + * Rank verified hypotheses via TrueSkill (or compatible substrate). + * Returns hypotheses sorted descending by conservativeSkill. + * Per B-0914.1 PR #5764 — caller wires rate1v1 + conservativeSkill. + */ + readonly rankSurvivors: ( + verified: ReadonlyArray>, + ) => Promise>>; + + /** + * Evolve top-N ranked survivors into refined variants. + * Per B-0914.5 PR #5767 — caller wires evolveTopN. + */ + readonly evolveSurvivors: ( + ranked: ReadonlyArray>, + cycleIndex: number, + ) => Promise>>; +} + +/** + * Closed-loop configuration. + */ +export interface LoopConfig { + readonly maxCycles: number; // bounded iteration; safety bound + readonly topNToEvolve: number; // how many survivors per cycle to evolve + readonly minPropagatable: number; // minimum survivors required to continue (else terminate) +} + +export const DEFAULT_LOOP_CONFIG: LoopConfig = { + maxCycles: 10, + topNToEvolve: 3, + minPropagatable: 1, +}; + +/** + * Run a single closed-loop iteration cycle. + * + * Cycle steps: + * 1. Dispatch each hypothesis to CI + * 2. Collect verdicts + * 3. Filter to verified + needs-revision-with-suggestions (propagatable) + * 4. Rank via TrueSkill (caller-injected) + * 5. Evolve top-N (caller-injected) + * 6. Return refined variants for next cycle + * + * Per `.claude/rules/holding-without-named-dependency-is-standing-by-failure.md`: + * the loop is genuinely-active substrate work; not standing-by-empty. + */ +export async function runCycle( + hypotheses: ReadonlyArray>, + callbacks: LoopCallbacks, + cycleIndex: number, + config: LoopConfig = DEFAULT_LOOP_CONFIG, +): Promise> { + if (hypotheses.length === 0) { + return { ok: false, feedback: { kind: "EmptyHypothesisSet" } }; + } + + // Step 1+2: dispatch to CI + collect verdicts + const verdicts: Array<{ hypothesis: Hypothesis; verdict: CiVerdict }> = []; + for (const h of hypotheses) { + try { + const verdict = await callbacks.dispatchCi(h); + verdicts.push({ hypothesis: h, verdict }); + } catch (err) { + return { + ok: false, + feedback: { + kind: "CiDispatchFailure", + hypothesisId: h.id, + reason: err instanceof Error ? err.message : String(err), + }, + }; + } + } + + // Step 3: filter to propagatable (passed + needs-revision-with-suggestions) + const propagatable: Hypothesis[] = []; + for (const { hypothesis, verdict } of verdicts) { + switch (verdict.kind) { + case "passed": + propagatable.push(hypothesis); + break; + case "needs-revision": + if (verdict.suggestions.length > 0) { + propagatable.push(hypothesis); + } + break; + case "failed": + // Excluded from propagation per pairing-tracker propagatableEmissionIds rule + break; + case "infrastructure-error": + // Excluded; infrastructure failures don't reflect hypothesis quality + break; + } + } + + if (propagatable.length < config.minPropagatable) { + return { + ok: false, + feedback: { + kind: "MaxCyclesReached", + cyclesCompleted: cycleIndex, + }, + }; + } + + // Step 4: rank via caller-injected TrueSkill + let ranked: ReadonlyArray>; + try { + ranked = await callbacks.rankSurvivors(propagatable); + } catch (err) { + return { + ok: false, + feedback: { kind: "RankingFailure", reason: err instanceof Error ? err.message : String(err) }, + }; + } + + // Step 5: evolve top-N via caller-injected evolution + const topN = ranked.slice(0, config.topNToEvolve); + let refined: ReadonlyArray>; + try { + refined = await callbacks.evolveSurvivors(topN, cycleIndex + 1); + } catch (err) { + return { + ok: false, + feedback: { kind: "EvolutionFailure", reason: err instanceof Error ? err.message : String(err) }, + }; + } + + return { + ok: true, + refined, + cycleIndex: cycleIndex + 1, + }; +} + +/** + * Run multiple closed-loop iteration cycles until termination + * condition (max cycles OR propagatable drops below minimum OR + * caller-supplied predicate returns false). + * + * Returns the final cycle's refined hypotheses + the cycle count completed. + */ +export interface LoopTermination { + readonly terminatedAtCycle: number; + readonly reason: "max-cycles" | "insufficient-propagatable" | "predicate-stopped" | "error"; + readonly finalHypotheses: ReadonlyArray>; + readonly feedback?: LoopFeedback; +} + +export async function runLoop( + initialHypotheses: ReadonlyArray>, + callbacks: LoopCallbacks, + config: LoopConfig = DEFAULT_LOOP_CONFIG, + shouldContinue?: (cycleIndex: number, current: ReadonlyArray>) => boolean, +): Promise> { + let current = initialHypotheses; + let cycleIndex = 0; + + while (cycleIndex < config.maxCycles) { + if (shouldContinue && !shouldContinue(cycleIndex, current)) { + return { + terminatedAtCycle: cycleIndex, + reason: "predicate-stopped", + finalHypotheses: current, + }; + } + + const result = await runCycle(current, callbacks, cycleIndex, config); + if (!result.ok) { + if (result.feedback.kind === "MaxCyclesReached") { + return { + terminatedAtCycle: cycleIndex, + reason: "insufficient-propagatable", + finalHypotheses: current, + feedback: result.feedback, + }; + } + return { + terminatedAtCycle: cycleIndex, + reason: "error", + finalHypotheses: current, + feedback: result.feedback, + }; + } + current = result.refined; + cycleIndex = result.cycleIndex; + } + + return { + terminatedAtCycle: cycleIndex, + reason: "max-cycles", + finalHypotheses: current, + }; +} From bd54a7fbeb671d4b592a13da671732865c6af989 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 28 May 2026 07:31:39 -0400 Subject: [PATCH 2/2] fix(B-0914.2): address 7 Copilot review threads on PR #5769 - Replace 'Aaron' with 'human maintainer' role-ref per AGENT-BEST-PRACTICES (Otto-279) - Fix broken rule-path xrefs (full filenames for monad-propagation + asymmetric-authorship) - Split LoopFeedback: introduce InsufficientPropagatable variant separate from MaxCyclesReached - Update runLoop to map InsufficientPropagatable -> insufficient-propagatable termination - Add assertNever default in exhaustiveness tests (compile-time guard now real) - Tighten integration test: deterministic insufficient-propagatable at cycle 1 16 tests pass. Co-Authored-By: Claude Opus 4.7 --- tools/workflow-engine/closed-loop.test.ts | 27 +++++++++++++++-------- tools/workflow-engine/closed-loop.ts | 22 +++++++++++------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/tools/workflow-engine/closed-loop.test.ts b/tools/workflow-engine/closed-loop.test.ts index c47ea7c263..61216d8b41 100644 --- a/tools/workflow-engine/closed-loop.test.ts +++ b/tools/workflow-engine/closed-loop.test.ts @@ -116,7 +116,7 @@ describe("B-0914.2 closed-loop orchestrator", () => { expect(rankedCount).toBe(2); // good + revise both propagate; bad excluded }); - it("runCycle returns MaxCyclesReached when propagatable below minimum", async () => { + it("runCycle returns InsufficientPropagatable when propagatable below minimum", async () => { const hs = [hypothesis("h1", "alpha")]; const callbacks: LoopCallbacks = { dispatchCi: failingCi, // all fail @@ -126,7 +126,7 @@ describe("B-0914.2 closed-loop orchestrator", () => { const result = await runCycle(hs, callbacks, 0); expect(result.ok).toBe(false); if (result.ok) return; - expect(result.feedback.kind).toBe("MaxCyclesReached"); + expect(result.feedback.kind).toBe("InsufficientPropagatable"); }); it("runCycle returns CiDispatchFailure on CI exception", async () => { @@ -236,21 +236,27 @@ describe("B-0914.2 closed-loop orchestrator", () => { it("LoopFeedback exhaustive switch (compile-time check)", () => { type Feedback = NonNullable>>> extends { ok: false; feedback: infer F } ? F : never; + const assertNever = (x: never): never => { throw new Error(`unhandled LoopFeedback: ${JSON.stringify(x)}`); }; const acknowledge = (f: Feedback): string => { switch (f.kind) { case "EmptyHypothesisSet": case "CiDispatchFailure": case "RankingFailure": case "EvolutionFailure": + case "InsufficientPropagatable": case "MaxCyclesReached": return f.kind; + default: + return assertNever(f); } }; expect(acknowledge({ kind: "EmptyHypothesisSet" })).toBe("EmptyHypothesisSet"); expect(acknowledge({ kind: "CiDispatchFailure", hypothesisId: "x", reason: "y" })).toBe("CiDispatchFailure"); + expect(acknowledge({ kind: "InsufficientPropagatable", propagatableCount: 0, minRequired: 1, cycleIndex: 0 })).toBe("InsufficientPropagatable"); }); it("CiVerdict exhaustive switch (compile-time check)", () => { + const assertNever = (x: never): never => { throw new Error(`unhandled CiVerdict: ${JSON.stringify(x)}`); }; const acknowledge = (v: CiVerdict): string => { switch (v.kind) { case "passed": @@ -258,6 +264,8 @@ describe("B-0914.2 closed-loop orchestrator", () => { case "needs-revision": case "infrastructure-error": return v.kind; + default: + return assertNever(v); } }; expect(acknowledge({ kind: "passed" })).toBe("passed"); @@ -278,12 +286,13 @@ describe("B-0914.2 closed-loop orchestrator", () => { evolveSurvivors: mockEvolve, }; const termination = await runLoop(hs, callbacks, { ...DEFAULT_LOOP_CONFIG, maxCycles: 2 }); - // After cycle 0: h3-bad fails, h1-good + h2-good propagate, evolve to 1 variant - // After cycle 1: 1 variant passes (passingCi-effect from "evolved-..." not -good/-bad pattern; - // wait: mockEvolve produces ids like "evolved-cycle-N" which doesn't end in -good/-bad/-revise; - // mixedCi falls through to "failed" by default for non-matching ids; - // so cycle 1 has 1 hypothesis that fails → terminates as insufficient-propagatable) - expect(termination.terminatedAtCycle).toBeGreaterThanOrEqual(1); - expect(["insufficient-propagatable", "max-cycles"]).toContain(termination.reason); + // Cycle 0: h3-bad fails, h1-good + h2-good propagate, evolve to 1 variant + // ("evolved-cycle-1") via mockEvolve. + // Cycle 1: mixedCi falls through to "failed" for "evolved-cycle-*" ids + // (no -good/-bad/-revise suffix), so propagatable.length = 0 < minPropagatable=1. + // Terminates deterministically as insufficient-propagatable at cycle 1. + expect(termination.terminatedAtCycle).toBe(1); + expect(termination.reason).toBe("insufficient-propagatable"); + expect(termination.feedback?.kind).toBe("InsufficientPropagatable"); }); }); diff --git a/tools/workflow-engine/closed-loop.ts b/tools/workflow-engine/closed-loop.ts index c587293dbf..ad2ed09433 100644 --- a/tools/workflow-engine/closed-loop.ts +++ b/tools/workflow-engine/closed-loop.ts @@ -9,9 +9,10 @@ * - CI-result dispatch (via callbacks; integrates with B-0891 zflash * test-harness substrate when wired by caller) * - * Per Aaron 2026-05-28 'S M L all please in that order lol' — L (large - * scope) in the substrate-engineering ship-sequence. Wire-up that turns - * the tournament-loop substrate into a live closed-loop iteration system. + * Per human maintainer 2026-05-28 'S M L all please in that order lol' — L + * (large scope) in the substrate-engineering ship-sequence. Wire-up that + * turns the tournament-loop substrate into a live closed-loop iteration + * system. * * Design: pure loop-orchestration substrate with INJECTABLE callbacks * for substrate-specific operations (ranking / evolution / verification). @@ -34,8 +35,10 @@ * - B-0891 zflash test-harness substrate (caller can wire CI dispatch * to actual test runners per determineRunnability discriminator) * - B-0867 workflow engine substrate - * - .claude/rules/monad-propagation-pattern (Result) - * - .claude/rules/asymmetric-authorship (each callback authors own TFeedback) + * - .claude/rules/monad-propagation-pattern-cross-language-substrate-shape.md + * (Result) + * - .claude/rules/asymmetric-authorship-substrate-entity-defines-consent-channel-recipient-acknowledges.md + * (each callback authors own TFeedback) * * PoC scope: pure orchestration logic with injectable callbacks. Real * CI integration (via tools/ci/ + B-0891) handled by caller wiring. @@ -72,6 +75,7 @@ export type LoopFeedback = | { kind: "CiDispatchFailure"; hypothesisId: string; reason: string } | { kind: "RankingFailure"; reason: string } | { kind: "EvolutionFailure"; reason: string } + | { kind: "InsufficientPropagatable"; propagatableCount: number; minRequired: number; cycleIndex: number } | { kind: "MaxCyclesReached"; cyclesCompleted: number }; /** @@ -194,8 +198,10 @@ export async function runCycle( return { ok: false, feedback: { - kind: "MaxCyclesReached", - cyclesCompleted: cycleIndex, + kind: "InsufficientPropagatable", + propagatableCount: propagatable.length, + minRequired: config.minPropagatable, + cycleIndex, }, }; } @@ -264,7 +270,7 @@ export async function runLoop( const result = await runCycle(current, callbacks, cycleIndex, config); if (!result.ok) { - if (result.feedback.kind === "MaxCyclesReached") { + if (result.feedback.kind === "InsufficientPropagatable") { return { terminatedAtCycle: cycleIndex, reason: "insufficient-propagatable",