Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions tools/orchestrator-checks/cron-sentinel-mutex.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env bun
import { describe, it, expect } from "bun:test";
import { checkPeerSessions, formatResult, mainResult, PGREP_ERROR_EXIT } from "./cron-sentinel-mutex";
import type { spawnSync } from "node:child_process";

type SpawnSync = typeof spawnSync;

function fakeSpawn(stdoutLines: string[], status = 0, error?: Error): SpawnSync {
return (() => ({
pid: 1,
output: [],
stdout: stdoutLines.join("\n"),
stderr: "",
status,
signal: null,
error,
})) as unknown as SpawnSync;
}

describe("checkPeerSessions", () => {
it("returns peerDetected=false when no peers found", () => {
const r = checkPeerSessions(12345, fakeSpawn([], 1));
expect(r.peerDetected).toBe(false);
expect(r.peerPids).toEqual([]);
expect(r.myPid).toBe(12345);
});

it("excludes the self-PID from peers", () => {
const lines = [
"12345 /path/to/claude-code --output-format stream-json --verbose --input-format stream-json",
"67890 /path/to/claude-code --output-format stream-json --input-format stream-json",
];
const r = checkPeerSessions(12345, fakeSpawn(lines, 0));
expect(r.peerDetected).toBe(true);
expect(r.peerPids).toEqual([67890]);
});

it("excludes parent/disclaimer/finder helpers lacking the stdio flags", () => {
const lines = [
"100 /Applications/Claude.app/Contents/MacOS/Claude",
"200 /Applications/Claude.app/Contents/Helpers/disclaimer",
"300 /path/to/claude-code --output-format stream-json --input-format stream-json",
];
const r = checkPeerSessions(999, fakeSpawn(lines, 0));
// Only 300 has the stdio-mode flags; 100 and 200 are ancestors.
expect(r.peerPids).toEqual([300]);
expect(r.peerDetected).toBe(true);
});

it("returns no peers when pgrep stdout is empty even if status=0", () => {
const r = checkPeerSessions(1, fakeSpawn([], 0));
expect(r.peerDetected).toBe(false);
});

it("ignores malformed pgrep lines", () => {
const lines = [
"not-a-pid /path/to/claude-code --output-format ...",
" ",
"999 /path/to/claude-code --output-format stream-json --input-format stream-json",
];
const r = checkPeerSessions(1, fakeSpawn(lines, 0));
expect(r.peerPids).toEqual([999]);
});

it("excludes the self-PID even when its line matches the stdio pattern", () => {
const lines = [
"555 /path/to/claude-code --output-format stream-json --input-format stream-json",
];
const r = checkPeerSessions(555, fakeSpawn(lines, 0));
expect(r.peerDetected).toBe(false);
});

it("returns pgrepError when spawn fails (binary missing or permission denied)", () => {
const r = checkPeerSessions(1, fakeSpawn([], 0, new Error("ENOENT: pgrep not found")));
expect(r.peerDetected).toBe(false);
expect(r.pgrepError).toBe("ENOENT: pgrep not found");
expect(r.peerPids).toEqual([]);
});

it("returns pgrepError when pgrep exits with status > 1 (runtime error)", () => {
const r = checkPeerSessions(1, fakeSpawn([], 2));
expect(r.peerDetected).toBe(false);
expect(r.pgrepError).toBe("pgrep exited with status 2");
});
});

describe("formatResult", () => {
it("formats no-peers case as a single line", () => {
const out = formatResult({ myPid: 42, peerPids: [], peerLines: [], peerDetected: false });
expect(out).toContain("no peer");
expect(out).toContain("self PID 42");
});

it("formats pgrep error case with unknown-mutex-state message", () => {
const out = formatResult({ myPid: 42, peerPids: [], peerLines: [], peerDetected: false, pgrepError: "ENOENT: pgrep not found" });
expect(out).toContain("pgrep failed");
expect(out).toContain("ENOENT: pgrep not found");
expect(out).toContain("mutex state unknown");
});

it("formats peer-detected case with multi-line summary", () => {
const out = formatResult({
myPid: 42,
peerPids: [100, 200],
peerLines: ["100 cmd a", "200 cmd b"],
peerDetected: true,
});
expect(out).toContain("2 peer");
expect(out).toContain("100 cmd a");
expect(out).toContain("200 cmd b");
});
});

describe("mainResult", () => {
it("returns 0 when no peers and no error", () => {
expect(mainResult({ myPid: 1, peerPids: [], peerLines: [], peerDetected: false })).toBe(0);
});

it("returns 1 + peer count when peers detected", () => {
expect(mainResult({ myPid: 1, peerPids: [99], peerLines: ["99 ..."], peerDetected: true })).toBe(2);
expect(mainResult({ myPid: 1, peerPids: [99, 100, 101], peerLines: [], peerDetected: true })).toBe(4);
});

it("clamps peer-count exit code to 250", () => {
const manyPeers = Array.from({ length: 300 }, (_, i) => i);
expect(mainResult({ myPid: 1, peerPids: manyPeers, peerLines: [], peerDetected: true })).toBe(250);
});

it("returns PGREP_ERROR_EXIT (251) when pgrepError is set, even with no peers", () => {
expect(mainResult({
myPid: 1, peerPids: [], peerLines: [], peerDetected: false,
pgrepError: "ENOENT: pgrep not found",
})).toBe(PGREP_ERROR_EXIT);
expect(PGREP_ERROR_EXIT).toBe(251);
});

it("returns PGREP_ERROR_EXIT even when peers were also detected (error takes precedence)", () => {
expect(mainResult({
myPid: 1, peerPids: [99], peerLines: [], peerDetected: true,
pgrepError: "pgrep exited with status 2",
})).toBe(PGREP_ERROR_EXIT);
});
});
137 changes: 137 additions & 0 deletions tools/orchestrator-checks/cron-sentinel-mutex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#!/usr/bin/env bun
// cron-sentinel-mutex.ts -- detect concurrent Otto-CLI claude-code sessions
// so the <<autonomous-loop>> tick can defer when a peer is mid-flight.
//
// Per docs/backlog/P3/B-0530-cron-sentinel-mutex-prevent-otto-cli-self-contention-2026-05-15.md
// + the worktree-prune-race root-cause analysis in PR #3370 (Pattern 8 of
// docs/backlog/P3/B-0519-multi-otto-branch-state-contamination-rca-2026-05-14.md).
//
// Composes with the sibling check at tools/orchestrator-checks/verify-branch.ts
// which uses the same spawnSync pattern (no shell, args as array).

import { spawnSync } from "node:child_process";

export interface MutexResult {
readonly myPid: number;
readonly peerPids: readonly number[];
readonly peerLines: readonly string[];
readonly peerDetected: boolean;
/** Set when pgrep itself failed (spawn error or exit status > 1). Mutex state is unknown. */
readonly pgrepError?: string;
}

const PROCESS_NAME_PATTERN = "claude-code";

/**
* Find concurrent claude-code processes other than this process.
* Uses pgrep -afl (BSD-compatible). The pgrep exit code is non-zero
* when zero processes match, which is a legitimate result and not an
* error. The function returns a structured result; the caller (the
* autonomous-loop tick body) decides whether to defer.
*/
export function checkPeerSessions(
myPid: number = process.pid,
spawnFn: typeof spawnSync = spawnSync,
): MutexResult {
// spawnSync with args array — no shell interpolation, no injection risk.
Comment thread
AceHack marked this conversation as resolved.
// eslint-disable-next-line sonarjs/no-os-command-from-path -- pgrep is a known system binary; args array prevents shell injection
const result = spawnFn("pgrep", ["-afl", PROCESS_NAME_PATTERN], {
encoding: "utf8",
});

// Distinguish "no match" (pgrep exit status 1) from real failures.
// result.error is set when the child process cannot be spawned at all (e.g., binary missing).
// status > 1 indicates a pgrep error (bad flag, permission denied, etc.).
if (result.error) {
return { myPid, peerPids: [], peerLines: [], peerDetected: false, pgrepError: result.error.message };
}
const exitStatus = result.status ?? 0;
if (exitStatus > 1) {
return { myPid, peerPids: [], peerLines: [], peerDetected: false, pgrepError: `pgrep exited with status ${exitStatus}` };
}

const stdout = typeof result.stdout === "string" ? result.stdout : "";
const lines = stdout.split("\n").filter((line) => line.trim().length > 0);
Comment thread
AceHack marked this conversation as resolved.

const peerLines: string[] = [];
const peerPids: number[] = [];

for (const line of lines) {
const match = /^(\d+)\s+(.*)$/.exec(line);
if (!match) continue;
const pidStr = match[1];
if (!pidStr) continue;
const pid = Number.parseInt(pidStr, 10);
if (!Number.isFinite(pid)) continue;
if (pid === myPid) continue;
// Skip ancestors / disclaimer-helper / finder-service matches:
// require the claude-code stdio-mode flags to mark a real peer.
const cmdline = match[2] ?? "";
if (!cmdline.includes("--output-format") && !cmdline.includes("--input-format")) {
continue;
}
peerPids.push(pid);
peerLines.push(line);
}

return {
myPid,
peerPids,
peerLines,
peerDetected: peerPids.length > 0,
};
}

export function formatResult(r: MutexResult): string {
if (r.pgrepError) {
return `cron-sentinel-mutex: pgrep failed — ${r.pgrepError} (self PID ${r.myPid}); mutex state unknown`;
}
if (!r.peerDetected) {
return `cron-sentinel-mutex: no peer claude-code sessions detected (self PID ${r.myPid})`;
}
const peerSummary = r.peerLines.length > 0 ? "\n " + r.peerLines.join("\n ") : "";
return (
`cron-sentinel-mutex: ${r.peerPids.length} peer claude-code session(s) detected (self PID ${r.myPid})` +
peerSummary
);
}

/** Exit code for "pgrep failed, mutex state unknown" — distinct from
* the 0..250 peer-count range so shell callers can branch on it. */
export const PGREP_ERROR_EXIT = 251;

export function mainResult(r: MutexResult): number {
// Diagnostic exit codes:
// 0 = no peers (safe to proceed)
// 1..250 = 1 + peer count (caller should defer)
// 251 = pgrep error / unknown state (caller should defer)
// Most callers should use the JSON output via --json instead.
if (r.pgrepError) {
return PGREP_ERROR_EXIT;
}
if (r.peerDetected) {
return Math.min(1 + r.peerPids.length, 250);
}
return 0;
Comment thread
AceHack marked this conversation as resolved.
}

function main(): number {
const r = checkPeerSessions();
console.error(formatResult(r));
return mainResult(r);
}

if (import.meta.main) {
const args = process.argv.slice(2);
if (args.includes("--json")) {
const r = checkPeerSessions();
console.log(JSON.stringify(r, null, 2));
// Use the same exit-code semantics as the non-JSON path so shell
// callers can branch on both stdout (structured) and $? (status).
// Without this, `set -e` scripts using --json would treat
// peerDetected=true and pgrepError as success and bypass the
// mutex protection.
process.exit(mainResult(r));
}
process.exit(main());
}
Loading