-
Notifications
You must be signed in to change notification settings - Fork 1
feat(b-0530): cron-sentinel-mutex — detect concurrent Otto-CLI sessions #3375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
AceHack
merged 4 commits into
main
from
feat/cron-sentinel-mutex-b0530-otto-cli-2026-05-15
May 15, 2026
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
7bcd1a5
feat(b-0530): cron-sentinel-mutex — detect concurrent Otto-CLI sessions
AceHack 0b3d03b
fix(b-0530): distinguish pgrep failures from true no-peer + add sonar…
AceHack b746bf7
fix(b-0530): main() returns PGREP_ERROR_EXIT on unknown mutex state
AceHack 1918cd1
fix(b-0530): --json mode also returns mainResult exit code
AceHack File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,143 @@ | ||
| #!/usr/bin/env bun | ||
| import { describe, it, expect } from "bun:test"; | ||
| import { checkPeerSessions, formatResult, mainResult, PGREP_ERROR_EXIT } from "./cron-sentinel-mutex"; | ||
| import type { spawnSync } from "node:child_process"; | ||
|
|
||
| type SpawnSync = typeof spawnSync; | ||
|
|
||
| function fakeSpawn(stdoutLines: string[], status = 0, error?: Error): SpawnSync { | ||
| return (() => ({ | ||
| pid: 1, | ||
| output: [], | ||
| stdout: stdoutLines.join("\n"), | ||
| stderr: "", | ||
| status, | ||
| signal: null, | ||
| error, | ||
| })) as unknown as SpawnSync; | ||
| } | ||
|
|
||
| describe("checkPeerSessions", () => { | ||
| it("returns peerDetected=false when no peers found", () => { | ||
| const r = checkPeerSessions(12345, fakeSpawn([], 1)); | ||
| expect(r.peerDetected).toBe(false); | ||
| expect(r.peerPids).toEqual([]); | ||
| expect(r.myPid).toBe(12345); | ||
| }); | ||
|
|
||
| it("excludes the self-PID from peers", () => { | ||
| const lines = [ | ||
| "12345 /path/to/claude-code --output-format stream-json --verbose --input-format stream-json", | ||
| "67890 /path/to/claude-code --output-format stream-json --input-format stream-json", | ||
| ]; | ||
| const r = checkPeerSessions(12345, fakeSpawn(lines, 0)); | ||
| expect(r.peerDetected).toBe(true); | ||
| expect(r.peerPids).toEqual([67890]); | ||
| }); | ||
|
|
||
| it("excludes parent/disclaimer/finder helpers lacking the stdio flags", () => { | ||
| const lines = [ | ||
| "100 /Applications/Claude.app/Contents/MacOS/Claude", | ||
| "200 /Applications/Claude.app/Contents/Helpers/disclaimer", | ||
| "300 /path/to/claude-code --output-format stream-json --input-format stream-json", | ||
| ]; | ||
| const r = checkPeerSessions(999, fakeSpawn(lines, 0)); | ||
| // Only 300 has the stdio-mode flags; 100 and 200 are ancestors. | ||
| expect(r.peerPids).toEqual([300]); | ||
| expect(r.peerDetected).toBe(true); | ||
| }); | ||
|
|
||
| it("returns no peers when pgrep stdout is empty even if status=0", () => { | ||
| const r = checkPeerSessions(1, fakeSpawn([], 0)); | ||
| expect(r.peerDetected).toBe(false); | ||
| }); | ||
|
|
||
| it("ignores malformed pgrep lines", () => { | ||
| const lines = [ | ||
| "not-a-pid /path/to/claude-code --output-format ...", | ||
| " ", | ||
| "999 /path/to/claude-code --output-format stream-json --input-format stream-json", | ||
| ]; | ||
| const r = checkPeerSessions(1, fakeSpawn(lines, 0)); | ||
| expect(r.peerPids).toEqual([999]); | ||
| }); | ||
|
|
||
| it("excludes the self-PID even when its line matches the stdio pattern", () => { | ||
| const lines = [ | ||
| "555 /path/to/claude-code --output-format stream-json --input-format stream-json", | ||
| ]; | ||
| const r = checkPeerSessions(555, fakeSpawn(lines, 0)); | ||
| expect(r.peerDetected).toBe(false); | ||
| }); | ||
|
|
||
| it("returns pgrepError when spawn fails (binary missing or permission denied)", () => { | ||
| const r = checkPeerSessions(1, fakeSpawn([], 0, new Error("ENOENT: pgrep not found"))); | ||
| expect(r.peerDetected).toBe(false); | ||
| expect(r.pgrepError).toBe("ENOENT: pgrep not found"); | ||
| expect(r.peerPids).toEqual([]); | ||
| }); | ||
|
|
||
| it("returns pgrepError when pgrep exits with status > 1 (runtime error)", () => { | ||
| const r = checkPeerSessions(1, fakeSpawn([], 2)); | ||
| expect(r.peerDetected).toBe(false); | ||
| expect(r.pgrepError).toBe("pgrep exited with status 2"); | ||
| }); | ||
| }); | ||
|
|
||
| describe("formatResult", () => { | ||
| it("formats no-peers case as a single line", () => { | ||
| const out = formatResult({ myPid: 42, peerPids: [], peerLines: [], peerDetected: false }); | ||
| expect(out).toContain("no peer"); | ||
| expect(out).toContain("self PID 42"); | ||
| }); | ||
|
|
||
| it("formats pgrep error case with unknown-mutex-state message", () => { | ||
| const out = formatResult({ myPid: 42, peerPids: [], peerLines: [], peerDetected: false, pgrepError: "ENOENT: pgrep not found" }); | ||
| expect(out).toContain("pgrep failed"); | ||
| expect(out).toContain("ENOENT: pgrep not found"); | ||
| expect(out).toContain("mutex state unknown"); | ||
| }); | ||
|
|
||
| it("formats peer-detected case with multi-line summary", () => { | ||
| const out = formatResult({ | ||
| myPid: 42, | ||
| peerPids: [100, 200], | ||
| peerLines: ["100 cmd a", "200 cmd b"], | ||
| peerDetected: true, | ||
| }); | ||
| expect(out).toContain("2 peer"); | ||
| expect(out).toContain("100 cmd a"); | ||
| expect(out).toContain("200 cmd b"); | ||
| }); | ||
| }); | ||
|
|
||
| describe("mainResult", () => { | ||
| it("returns 0 when no peers and no error", () => { | ||
| expect(mainResult({ myPid: 1, peerPids: [], peerLines: [], peerDetected: false })).toBe(0); | ||
| }); | ||
|
|
||
| it("returns 1 + peer count when peers detected", () => { | ||
| expect(mainResult({ myPid: 1, peerPids: [99], peerLines: ["99 ..."], peerDetected: true })).toBe(2); | ||
| expect(mainResult({ myPid: 1, peerPids: [99, 100, 101], peerLines: [], peerDetected: true })).toBe(4); | ||
| }); | ||
|
|
||
| it("clamps peer-count exit code to 250", () => { | ||
| const manyPeers = Array.from({ length: 300 }, (_, i) => i); | ||
| expect(mainResult({ myPid: 1, peerPids: manyPeers, peerLines: [], peerDetected: true })).toBe(250); | ||
| }); | ||
|
|
||
| it("returns PGREP_ERROR_EXIT (251) when pgrepError is set, even with no peers", () => { | ||
| expect(mainResult({ | ||
| myPid: 1, peerPids: [], peerLines: [], peerDetected: false, | ||
| pgrepError: "ENOENT: pgrep not found", | ||
| })).toBe(PGREP_ERROR_EXIT); | ||
| expect(PGREP_ERROR_EXIT).toBe(251); | ||
| }); | ||
|
|
||
| it("returns PGREP_ERROR_EXIT even when peers were also detected (error takes precedence)", () => { | ||
| expect(mainResult({ | ||
| myPid: 1, peerPids: [99], peerLines: [], peerDetected: true, | ||
| pgrepError: "pgrep exited with status 2", | ||
| })).toBe(PGREP_ERROR_EXIT); | ||
| }); | ||
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| #!/usr/bin/env bun | ||
| // cron-sentinel-mutex.ts -- detect concurrent Otto-CLI claude-code sessions | ||
| // so the <<autonomous-loop>> tick can defer when a peer is mid-flight. | ||
| // | ||
| // Per docs/backlog/P3/B-0530-cron-sentinel-mutex-prevent-otto-cli-self-contention-2026-05-15.md | ||
| // + the worktree-prune-race root-cause analysis in PR #3370 (Pattern 8 of | ||
| // docs/backlog/P3/B-0519-multi-otto-branch-state-contamination-rca-2026-05-14.md). | ||
| // | ||
| // Composes with the sibling check at tools/orchestrator-checks/verify-branch.ts | ||
| // which uses the same spawnSync pattern (no shell, args as array). | ||
|
|
||
| import { spawnSync } from "node:child_process"; | ||
|
|
||
| export interface MutexResult { | ||
| readonly myPid: number; | ||
| readonly peerPids: readonly number[]; | ||
| readonly peerLines: readonly string[]; | ||
| readonly peerDetected: boolean; | ||
| /** Set when pgrep itself failed (spawn error or exit status > 1). Mutex state is unknown. */ | ||
| readonly pgrepError?: string; | ||
| } | ||
|
|
||
| const PROCESS_NAME_PATTERN = "claude-code"; | ||
|
|
||
| /** | ||
| * Find concurrent claude-code processes other than this process. | ||
| * Uses pgrep -afl (BSD-compatible). The pgrep exit code is non-zero | ||
| * when zero processes match, which is a legitimate result and not an | ||
| * error. The function returns a structured result; the caller (the | ||
| * autonomous-loop tick body) decides whether to defer. | ||
| */ | ||
| export function checkPeerSessions( | ||
| myPid: number = process.pid, | ||
| spawnFn: typeof spawnSync = spawnSync, | ||
| ): MutexResult { | ||
| // spawnSync with args array — no shell interpolation, no injection risk. | ||
| // eslint-disable-next-line sonarjs/no-os-command-from-path -- pgrep is a known system binary; args array prevents shell injection | ||
| const result = spawnFn("pgrep", ["-afl", PROCESS_NAME_PATTERN], { | ||
| encoding: "utf8", | ||
| }); | ||
|
|
||
| // Distinguish "no match" (pgrep exit status 1) from real failures. | ||
| // result.error is set when the child process cannot be spawned at all (e.g., binary missing). | ||
| // status > 1 indicates a pgrep error (bad flag, permission denied, etc.). | ||
| if (result.error) { | ||
| return { myPid, peerPids: [], peerLines: [], peerDetected: false, pgrepError: result.error.message }; | ||
| } | ||
| const exitStatus = result.status ?? 0; | ||
| if (exitStatus > 1) { | ||
| return { myPid, peerPids: [], peerLines: [], peerDetected: false, pgrepError: `pgrep exited with status ${exitStatus}` }; | ||
| } | ||
|
|
||
| const stdout = typeof result.stdout === "string" ? result.stdout : ""; | ||
| const lines = stdout.split("\n").filter((line) => line.trim().length > 0); | ||
|
AceHack marked this conversation as resolved.
|
||
|
|
||
| const peerLines: string[] = []; | ||
| const peerPids: number[] = []; | ||
|
|
||
| for (const line of lines) { | ||
| const match = /^(\d+)\s+(.*)$/.exec(line); | ||
| if (!match) continue; | ||
| const pidStr = match[1]; | ||
| if (!pidStr) continue; | ||
| const pid = Number.parseInt(pidStr, 10); | ||
| if (!Number.isFinite(pid)) continue; | ||
| if (pid === myPid) continue; | ||
| // Skip ancestors / disclaimer-helper / finder-service matches: | ||
| // require the claude-code stdio-mode flags to mark a real peer. | ||
| const cmdline = match[2] ?? ""; | ||
| if (!cmdline.includes("--output-format") && !cmdline.includes("--input-format")) { | ||
| continue; | ||
| } | ||
| peerPids.push(pid); | ||
| peerLines.push(line); | ||
| } | ||
|
|
||
| return { | ||
| myPid, | ||
| peerPids, | ||
| peerLines, | ||
| peerDetected: peerPids.length > 0, | ||
| }; | ||
| } | ||
|
|
||
| export function formatResult(r: MutexResult): string { | ||
| if (r.pgrepError) { | ||
| return `cron-sentinel-mutex: pgrep failed — ${r.pgrepError} (self PID ${r.myPid}); mutex state unknown`; | ||
| } | ||
| if (!r.peerDetected) { | ||
| return `cron-sentinel-mutex: no peer claude-code sessions detected (self PID ${r.myPid})`; | ||
| } | ||
| const peerSummary = r.peerLines.length > 0 ? "\n " + r.peerLines.join("\n ") : ""; | ||
| return ( | ||
| `cron-sentinel-mutex: ${r.peerPids.length} peer claude-code session(s) detected (self PID ${r.myPid})` + | ||
| peerSummary | ||
| ); | ||
| } | ||
|
|
||
| /** Exit code for "pgrep failed, mutex state unknown" — distinct from | ||
| * the 0..250 peer-count range so shell callers can branch on it. */ | ||
| export const PGREP_ERROR_EXIT = 251; | ||
|
|
||
| export function mainResult(r: MutexResult): number { | ||
| // Diagnostic exit codes: | ||
| // 0 = no peers (safe to proceed) | ||
| // 1..250 = 1 + peer count (caller should defer) | ||
| // 251 = pgrep error / unknown state (caller should defer) | ||
| // Most callers should use the JSON output via --json instead. | ||
| if (r.pgrepError) { | ||
| return PGREP_ERROR_EXIT; | ||
| } | ||
| if (r.peerDetected) { | ||
| return Math.min(1 + r.peerPids.length, 250); | ||
| } | ||
| return 0; | ||
|
AceHack marked this conversation as resolved.
|
||
| } | ||
|
|
||
| function main(): number { | ||
| const r = checkPeerSessions(); | ||
| console.error(formatResult(r)); | ||
| return mainResult(r); | ||
| } | ||
|
|
||
| if (import.meta.main) { | ||
| const args = process.argv.slice(2); | ||
| if (args.includes("--json")) { | ||
| const r = checkPeerSessions(); | ||
| console.log(JSON.stringify(r, null, 2)); | ||
| // Use the same exit-code semantics as the non-JSON path so shell | ||
| // callers can branch on both stdout (structured) and $? (status). | ||
| // Without this, `set -e` scripts using --json would treat | ||
| // peerDetected=true and pgrepError as success and bypass the | ||
| // mutex protection. | ||
| process.exit(mainResult(r)); | ||
| } | ||
| process.exit(main()); | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.