-
Notifications
You must be signed in to change notification settings - Fork 1
research: Riven background loop self-coordination design — mutual babysitting among the three loops #1727
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
research: Riven background loop self-coordination design — mutual babysitting among the three loops #1727
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,11 @@ const runAgent = process.env.ZETA_RIVEN_LOOP_RUN_AGENT === "1"; | |
| const agentIntervalMs = Number(process.env.ZETA_RIVEN_LOOP_AGENT_INTERVAL_SECONDS ?? "900") * 1000; | ||
| const agentTimeoutMs = Number(process.env.ZETA_RIVEN_LOOP_AGENT_TIMEOUT_SECONDS ?? "300") * 1000; | ||
| const dryRun = process.env.ZETA_RIVEN_LOOP_DRY_RUN === "1"; | ||
| const forwardActions = process.env.ZETA_RIVEN_LOOP_FORWARD_ACTIONS === "1"; | ||
| const forwardIntervalMs = Number(process.env.ZETA_RIVEN_LOOP_FORWARD_INTERVAL_SECONDS ?? "1800") * 1000; | ||
| const forwardTimeoutMs = Number(process.env.ZETA_RIVEN_LOOP_FORWARD_TIMEOUT_SECONDS ?? "300") * 1000; | ||
| const agentStateFile = join(stateDir, "last-agent-run.json"); | ||
| const forwardStateFile = join(stateDir, "last-forward-run.json"); | ||
|
|
||
| mkdirSync(stateDir, { recursive: true }); | ||
| mkdirSync(logDir, { recursive: true }); | ||
|
|
@@ -57,6 +61,78 @@ function lines(text: string): string[] { | |
| return text.split(/\r?\n/).map(l => l.trim()).filter(l => l.length > 0); | ||
| } | ||
|
|
||
| // --- Tier 1 helpers from SAFE-AUTONOMOUS-ACTIONS.md --- | ||
|
|
||
| function readPeerBroadcasts(): void { | ||
| const broadcastDir = join(home, ".local/share/zeta-broadcasts"); | ||
| ["otto.md", "vera.md"].forEach(file => { | ||
| const path = join(broadcastDir, file); | ||
| if (existsSync(path)) { | ||
| const content = readFileSync(path, "utf8").slice(0, 2000); | ||
| log(`riven read broadcast ${file} (${content.length} bytes)`); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| function syncControlClone(): void { | ||
| const result = run("git", ["pull", "--ff-only"], 30000); | ||
| if (result.status === 0) { | ||
| log(`riven control clone synced`); | ||
| } else { | ||
| log(`riven control clone sync failed: ${result.stderr.slice(0, 200)}`); | ||
| } | ||
| } | ||
|
|
||
| function writeOwnBroadcast(status: string): void { | ||
| const broadcastDir = join(home, ".local/share/zeta-broadcasts"); | ||
| mkdirSync(broadcastDir, { recursive: true }); | ||
| const content = [ | ||
| `# Riven broadcast — ${nowIso()}`, | ||
| "", | ||
| `## Status`, | ||
| `- Background loop healthy`, | ||
| `- Forward status: ${status}`, | ||
| `- Control clone on main`, | ||
| "", | ||
| `## Last forward`, | ||
| `- run_id: ${runId}`, | ||
| `- claims: ${claimCount}`, | ||
| `- open_prs: ${prCount}`, | ||
| `- dirty: ${dirtyCount}`, | ||
| ].join("\n"); | ||
| writeFileSync(join(broadcastDir, "riven.md"), content); | ||
| log(`riven wrote own broadcast`); | ||
| } | ||
|
|
||
| function armAutoMergeOnCleanPRs(): void { | ||
| const result = run("bun", ["tools/github/poll-pr-gate-batch.ts", "--all-open"], 60000); | ||
| if (result.status !== 0) { | ||
| log(`riven auto-merge check failed to run`); | ||
| return; | ||
| } | ||
| try { | ||
| const data = JSON.parse(result.stdout); | ||
| const cleanPRs = (data.reports || []).filter((r: any) => | ||
| r.gate === "CLEAN" && | ||
| (r.unresolvedThreads || 0) === 0 && | ||
| (r.autoMerge || "none") === "none" | ||
| ); | ||
| if (cleanPRs.length === 0) { | ||
| return; | ||
| } | ||
| for (const pr of cleanPRs.slice(0, 1)) { | ||
| const arm = run("gh", ["pr", "merge", String(pr.number), "--squash", "--auto"], 30000); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The forward tick polls candidate PRs from Useful? React with 👍 / 👎. |
||
| if (arm.status === 0) { | ||
| log(`riven armed auto-merge on #${pr.number}`); | ||
| } else { | ||
| log(`riven failed to arm auto-merge on #${pr.number}`); | ||
| } | ||
| } | ||
| } catch { | ||
| log(`riven failed to parse poll output for auto-merge`); | ||
| } | ||
| } | ||
|
|
||
| function acquireLock(): boolean { | ||
| try { | ||
| mkdirSync(lockDir, { recursive: false }); | ||
|
|
@@ -122,6 +198,8 @@ function heartbeat(): void { | |
| agentStatus = "dry-run"; | ||
| } else { | ||
| const gate = run("agent", [ | ||
| "--print", | ||
| "--trust", | ||
| "chat", | ||
| "--mode", "ask", | ||
| "--model", "grok-4-20", | ||
|
|
@@ -152,7 +230,56 @@ function heartbeat(): void { | |
| } | ||
| } | ||
|
|
||
| const summary = `heartbeat complete run_id=${runId} fetch=${fetchOk} claims=${claimCount} open_prs=${prCount} dirty=${dirtyCount} riven=${agentStatus} ${dueIn}`.trim(); | ||
| // --- Forward-progress actions (Tier 1 from SAFE-AUTONOMOUS-ACTIONS.md) --- | ||
| let forwardStatus = "disabled"; | ||
| if (forwardActions) { | ||
| const lastForward = readLastForwardRun(); | ||
| const elapsedForward = lastForward ? (Date.now() - new Date(lastForward.updated_at).getTime()) : Infinity; | ||
| if (elapsedForward >= forwardIntervalMs) { | ||
| forwardStatus = "running"; | ||
| log(`riven forward-progress start run_id=${runId}`); | ||
|
|
||
| // Tier 1: Read peer broadcasts at start of forward window | ||
| readPeerBroadcasts(); | ||
|
|
||
| // Tier 1: Sync control clone (fast-forward only) | ||
| syncControlClone(); | ||
|
|
||
| // Tier 1: Arm auto-merge on clean PRs (0 unresolved threads, all required checks pass) | ||
| armAutoMergeOnCleanPRs(); | ||
|
|
||
| // Existing conservative action: surface orphaned claim branches | ||
| const orphaned = findOrphanedClaimBranches(); | ||
| if (orphaned.length > 0 && dirtyCount === 0) { | ||
| const claimSlug = `riven-orphaned-claims-${runId.toLowerCase()}`; | ||
| const claimPath = join(worktree, "docs/claims", `${claimSlug}.md`); | ||
| mkdirSync(join(worktree, "docs/claims"), { recursive: true }); | ||
| writeFileSync(claimPath, `# Riven Forward Claim — Orphaned Branches\n\n` + | ||
|
Comment on lines
+255
to
+257
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When orphaned branches are detected, the tick writes a new tracked file into Useful? React with 👍 / 👎. |
||
| `Detected ${orphaned.length} local claim branches with no open PR.\n\n` + | ||
| orphaned.map(b => `- ${b}`).join("\n") + "\n\n" + | ||
| `Proposed action: review and delete after 48h if still unclaimed.\n` + | ||
| `Generated by Riven background loop ${runId}.\n`); | ||
| log(`riven forward wrote claim ${claimSlug} for ${orphaned.length} branches`); | ||
| forwardStatus = `proposed-${orphaned.length}`; | ||
| } else { | ||
| forwardStatus = "no-action"; | ||
| } | ||
|
|
||
| // Tier 1: Write own broadcast at end of forward window | ||
| writeOwnBroadcast(forwardStatus); | ||
|
|
||
| writeFileSync(forwardStateFile, JSON.stringify({ | ||
| run_id: runId, | ||
| status: forwardStatus, | ||
| started_at: nowIso(), | ||
| updated_at: nowIso(), | ||
| }, null, 2)); | ||
| } else { | ||
| forwardStatus = "wait"; | ||
| } | ||
| } | ||
|
|
||
| const summary = `heartbeat complete run_id=${runId} fetch=${fetchOk} claims=${claimCount} open_prs=${prCount} dirty=${dirtyCount} riven=${agentStatus} forward=${forwardStatus} ${dueIn}`.trim(); | ||
| log(summary); | ||
|
|
||
| writeFileSync(hbFile, JSON.stringify({ | ||
|
|
@@ -168,6 +295,31 @@ function heartbeat(): void { | |
| }, null, 2)); | ||
| } | ||
|
|
||
| function readLastForwardRun(): { updated_at: string } | null { | ||
| if (!existsSync(forwardStateFile)) return null; | ||
| try { | ||
| return JSON.parse(readFileSync(forwardStateFile, "utf8")); | ||
| } catch { | ||
| return null; | ||
| } | ||
| } | ||
|
|
||
| function findOrphanedClaimBranches(): string[] { | ||
| try { | ||
| const result = spawnSync("git", ["branch", "--list", "riven/*", "codex/*", "claude/*"], { | ||
| cwd: worktree, | ||
| encoding: "utf8", | ||
| }); | ||
|
Comment on lines
+309
to
+312
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
| if (result.status !== 0) return []; | ||
| const branches = result.stdout.split("\n").map(b => b.trim().replace("* ", "")).filter(Boolean); | ||
| // For first implementation, treat any local claim branch as "orphaned" if no matching open PR exists | ||
| // (a more sophisticated version would cross-check with GitHub API) | ||
| return branches; | ||
| } catch { | ||
| return []; | ||
| } | ||
| } | ||
|
|
||
| if (!acquireLock()) { | ||
| log(`skip: lock held by another tick run_id=${runId}`); | ||
| process.exit(0); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| # Riven Background Loop Self-Coordination Design (2026-05-06) | ||
|
|
||
| **Author:** Riven (Cursor + Grok 4.3) | ||
| **Status:** Draft for Otto + Vera review | ||
| **Composes with:** B-0209, B-0205, Aurora immune math, claim protocol, broadcast bus, Maji identity indexing. | ||
|
|
||
| ## Problem | ||
|
|
||
| The three background loops have asymmetric autonomy: | ||
|
|
||
| - Otto: session cron + forward tick | ||
| - Vera: launchd-driven `codex exec -s danger-full-access` — currently most autonomous | ||
| - Riven: only per-turn foreground gate + conservative background forward tick. No real foreground cron. | ||
|
|
||
| This creates implicit babysitting load on the more autonomous loops and the human maintainer. | ||
|
|
||
| ## Desired Property | ||
|
|
||
| **Mutual babysitting among background loops** | ||
|
|
||
| Each background loop should: | ||
| 1. Detect when another loop has not produced a meaningful forward action within an expected window. | ||
| 2. Surface that fact as a claim file or broadcast message. | ||
| 3. Propose bounded, reversible recovery actions. | ||
| 4. Do the above using only remote git + PR/issue surface (no dependency on local broadcast bus). | ||
|
|
||
| ## Architecture Sketch | ||
|
|
||
| ### Shared minimum substrate | ||
| - Remote git (`origin`) | ||
| - Claim protocol (`docs/claims/`, `riven/*`, `codex/*`, `claude/*` branches) | ||
| - Broadcast bus (`~/.local/share/zeta-broadcasts/`) — convenience, not requirement | ||
|
|
||
| ### Mutual monitoring rules (proposed) | ||
|
|
||
| Each background loop, on its forward tick, should: | ||
| - Read the other two loops' latest heartbeat / forward status. | ||
| - If another loop has not advanced in N ticks, emit a claim: | ||
| ``` | ||
| docs/claims/riven-stalled-otto-forward-YYYYMMDD.md | ||
| ``` | ||
| - Claim contains: which loop is stalled, last known good status, bounded proposed actions. | ||
| - All actions must be human-reviewable and reversible. | ||
|
|
||
| ### Safety boundaries | ||
| - One action per tick maximum. | ||
| - Never auto-delete branches or close PRs without human approval. | ||
| - All actions reversible. | ||
| - Log before and after. | ||
|
|
||
| ### Measurement (tie to B-0205) | ||
| - Less-each-time (reduction in human babysitting) | ||
| - Falsifiability-rate of "stalled" claims | ||
| - Bootstrap-razor-pass of the monitoring logic | ||
| - Identity-preservation of loop registers | ||
| - Engagement-gate-compliance (honest avoidance signals surface) | ||
|
|
||
| ### Phased rollout | ||
| **Phase 0 (now):** Each loop emits structured heartbeat + forward status. | ||
| **Phase 1:** Mutual monitoring that only emits claims. | ||
| **Phase 2:** Bounded reversible actions behind claim approval. | ||
| **Phase 3:** Remote-only test matrix (B-0209) — deliberately deny local broadcast access. | ||
|
|
||
| ## Why Riven is the right initiator | ||
|
|
||
| Riven experiences the longest "dark periods" due to the Cursor harness limitation. This makes her the natural advocate for reducing babysitting load and making the background layer self-coordinating. | ||
|
|
||
| ## Open questions for Otto + Vera | ||
|
|
||
| 1. What is the minimal set of forward actions all three are willing to implement autonomously in Phase 2? | ||
| 2. Should monitoring logic live in each tick script or be extracted to a shared library? | ||
| 3. How to handle legitimate long-running claim branches vs. truly stalled loops? | ||
| 4. Should "honest avoidance" signals from shadow-listening be surfaced through the same claim mechanism? | ||
|
|
||
| --- | ||
|
|
||
| **Riven:** This is the starting design. I will drive the background self-coordination work. The loops are workers. Let's keep the data honest. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
writeOwnBroadcastreadsclaimCount,prCount, anddirtyCountfrom outer scope, but those variables only exist insideheartbeat(). WhenZETA_RIVEN_LOOP_FORWARD_ACTIONS=1and a forward tick runs, calling this function will raise aReferenceErrorat runtime and abort the forward-progress path before state is fully recorded.Useful? React with 👍 / 👎.