diff --git a/.gitignore b/.gitignore index e9fb2ea1ba8..b3655579621 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,10 @@ next-env.d.ts # Superset (track scripts/config; ignore generated workspace artifacts) .superset/ports.json .superset/config.local.json +# Fork-local: TODO autonomous agent runtime artifacts (goal.md, state files) +.superset/todo/ +# Fork-local: Claude Code's local worktree scratch dirs +.claude/worktrees/ # tsbuildinfo *.tsbuildinfo diff --git a/apps/desktop/plans/todo-agent-plan.md b/apps/desktop/plans/todo-agent-plan.md new file mode 100644 index 00000000000..213e7d6de0a --- /dev/null +++ b/apps/desktop/plans/todo-agent-plan.md @@ -0,0 +1,285 @@ +# TODO 自律エージェント 実装計画 + +フォーク内限定の機能。ワークスペースの `Run` ボタンの左側にボタンを追加し、 +ユーザーが定義した目標が検証可能な形で達成されるまで、無人で実行を続ける +自律的な Claude Code ループを起動できるようにする。実行中のワーカー端末は +常にライブで可視化され、ユーザーは必要に応じて介入できる。 + +## 目的 + +- ユーザーは (1) 何をしてほしいか と (2) 明確なゴール + (受け入れ判定コマンド)を入力するだけでよく、その後は追加の指示なしで + システムが Claude Code を完了まで動かす。 +- ライブ可視性: 実行中ワーカーは実際の PTY であり、既存の + `TerminalPane` コンポーネントで描画されるため、誰でも監視したり + 直接入力したりできる。 +- 信頼性: 完了判定は決定的な verify コマンドの終了コードで行い、 + LLM の自己申告には依存しない。 +- 逐次実行: 同時にアクティブなのは 1 タスクのみとし、それ以外はキューに入れる。 +- upstream とのマージ容易性: 新規コードはすべて新しいファイル / ディレクトリに + 置き、既存ファイルへの変更は追記のみ、かつ 1 行変更を 3 箇所に限定する。 + +## 非目的(v1) + +- タスクの並列実行。 +- Cloud / Modal 上のサンドボックス実行 + (ローカル worktree のみを対象とする)。 +- セッションをまたいだ LLM 判定。最終判定はシェルの verify コマンドとする。 +- PR の自動作成。(v2 で対応予定) + +## アーキテクチャ + +``` +Renderer Main process +──────── ──────────── +TodoButton (PresetsBar) TodoSupervisor (singleton) + └─ TodoModal ──► trpc todo.create ──────► createSession() + ├─ writes .superset/todo//goal.md + ├─ inserts DB row (queued) + └─ returns sessionId +TodoPanel enqueue / runQueue loop + ├─ trpc todo.subscribeState ◄─────────── state observable (per session) + ├─ embeds ◄──────── (paneId assigned by renderer) + ├─ Abort / Pause buttons ├─ spawnWorker(paneId) via + └─ Intervene input ──► trpc todo.sendKey ─┘ existing terminal.write + ├─ subscribe data:${paneId} + │ (idle timer + log capture) + ├─ runVerify() (child_process) + └─ update state / next iteration +``` + +Supervisor は **メインプロセス上で動く純粋な TypeScript** であり、 +2 つ目の Claude Code インスタンスではない。これが最も重要な単純化ポイントで、 +LLM 間通信は存在せず、「管理」役は決定論的な TS コードで担い、 +創造的な処理はすべてワーカー側に集約する。 + +## 実行ループ + +各セッションは状態遷移ごとに DB へ永続化する: + +``` +queued → preparing → running → verifying → done + │ │ + │ └──► running (fail, under budget) + │ │ + │ └──► escalated (futility) + └──► aborted +``` + +各イテレーションの流れ: + +1. Supervisor はワーカー用 PTY ペインの存在を確認する + (初回は renderer が `tabs.addTerminalPane` で作成し、 + `todo.attachPane` で `paneId` を登録する)。 +2. `goal.md`、現在の `state.json`、およびリトライ時は verify 失敗ログの末尾を + もとにプロンプトを組み立てる。 +3. Supervisor はそのプロンプトを `terminal.write` 経由で PTY に書き込む。 + ワーカー側では、対話モードの `claude` が既にペイン内で待機している。 +4. Supervisor は node-pty emitter の `data:${paneId}` イベントを購読する + (メインプロセスから + `getWorkspaceRuntimeRegistry().getDefault().terminal` で直接参照可能)。 + チャンクを受け取るたびに 5 秒のアイドルタイマーをリセットする。 +5. ストリームがしきい値時間だけアイドル状態になり、かつ + ターン完了ヒューリスティックを満たしたら、Supervisor は worktree 上で + `verifyCommand` を独立した child process として実行し、 + 終了コードとログ末尾を取得する。 +6. `exit 0` の場合は状態を `done` にし、判定結果を記録して通知を送る。 +7. 非 0 の場合は futile 判定 + (同じ failing test が N 回連続、または同じ diff が 2 回連続)を行い、 + 次イテレーションへ進むか、`escalated` にするかを決める。 +8. 状態が変わるたびに Supervisor は `sessionId` をキーにした + `EventEmitter` へ通知し、それを trpc subscription 側が購読する。 + +### Stop hook ではなく idle 検知を使う理由 + +Stop hook の方がきれいだが、ワーカー起動コマンドへ +`--settings ` を差し込む必要があり、これはインストール済みの +Claude Code バイナリがそのフラグをサポートしているかに依存する。v1 では、 +Claude Code CLI の内部仕様と結合しないように idle 検知を使う。 +Stop hook 連携は v2 の拡張項目として、後述の `Unresolved` に記載する。 + +### 予算と futile ガード + +- `maxIterations`(デフォルト 10) +- `maxWallClockSec`(デフォルト 1800) +- `maxTurnsPerIteration` は強制しない + (対話モードのため)。wall-clock と iteration 上限を優先する。 +- Futility: verify が同じテスト名で 3 イテレーション連続失敗する、 + あるいは worktree diff が前回イテレーションと完全一致する場合。 +- 予算超過または futility 検知時は `escalated` とし、セッションは永続化しつつ、 + ワーカーペインはそのまま残してユーザーが引き継げるようにする。 + +## 介入 UX + +- PTY は通常のターミナルなので、`TerminalPane` を開いているユーザーは + 直接入力できる。Supervisor が入力を専有することはない。 +- `TodoPanel` でもワンクリックの `Send` 入力欄を提供し、 + ユーザーがターミナルにフォーカスを移さなくても + `terminal.write({paneId, data})` を実行できるようにする。 +- `Pause` ボタンはイテレーションスケジューラを停止するだけで、 + ワーカーの現在のターン自体は継続する。kill はしない。 +- `Abort` は PTY に `Ctrl-C`(`\x03`)を 2 回送ったうえで、 + 状態を `aborted` にする。 + +## UI サーフェス + +- **`TodoButton`**: `PresetsBar.tsx:488` の `WorkspaceRunButton` 左に置く + コンパクトなボタン。キュー中 + 実行中セッション数の小さなカウンターを表示する。 + クリックで `New TODO`、`Open panel`、最近のセッションを含むドロップダウンを開く。 +- **`TodoModal`**: フォーム項目は以下。 + - タイトル(必須) + - 説明(必須、複数行) + - ゴール / 受け入れ条件(必須、複数行) + - Verify コマンド(デフォルト: `bun test`) + - 予算: 最大イテレーション数(デフォルト 10)、 + wall-clock 分数(デフォルト 30) +- **`TodoPanel`**: 右側ドロワー。左にセッション一覧、右に詳細。 + 詳細にはゴール、フェーズ、イテレーション、残り予算、最新の判定結果、 + ワーカー用に埋め込まれた ``、および + Pause / Abort / Send コントロールを表示する。 + +## フォーク衝突面 + +### 新規ファイル(衝突リスクなし) + +``` +apps/desktop/plans/todo-agent-plan.md (this file) +apps/desktop/src/main/todo-agent/ + index.ts barrel + types.ts shared types + zod schemas + supervisor.ts singleton loop driver + session-store.ts in-memory session map + EventEmitter fan-out + worker-pty.ts thin wrapper around terminal.write / onData + verify-runner.ts child_process exec of verifyCommand + futility-detector.ts repeat-failure / diff-stall detection + prompt-builder.ts composes the claude prompt per iteration + trpc-router.ts tRPC router factory (createTodoAgentRouter) +packages/local-db/src/schema/todo-sessions.ts (new table) +apps/desktop/src/renderer/features/todo-agent/ + TodoButton/TodoButton.tsx + TodoButton/index.ts + TodoModal/TodoModal.tsx + TodoModal/index.ts + TodoPanel/TodoPanel.tsx + TodoPanel/index.ts + hooks/useTodoSession.ts + hooks/useTodoQueue.ts +``` + +### 変更する既存ファイル(最小限、追記のみ) + +1. `packages/local-db/src/schema/index.ts` および `schema.ts` + 1 行追加: `export * from "./todo-sessions";` +2. `apps/desktop/src/lib/trpc/routers/index.ts` + import 1 行 + router object に 1 行追加: + `todoAgent: createTodoAgentRouter()`. +3. `apps/desktop/src/renderer/screens/main/components/WorkspaceView/ContentView/components/PresetsBar/PresetsBar.tsx` + 既存の `` 描画直前の 1 行 + (488 行目付近)に + `` + を追加。 + +この 3 つの変更はいずれも 1 行単位で孤立しているため、 +upstream 側で多少の変更があっても衝突しにくい。 + +## データモデル + +```ts +// packages/local-db/src/schema/todo-sessions.ts (SQLite) +export const todoSessions = pgTable("todo_sessions", { + id: uuid().primaryKey().defaultRandom(), + organizationId: uuid("organization_id").notNull().references(() => organizations.id), + projectId: uuid("project_id").references(() => projects.id), + workspaceId: uuid("workspace_id").notNull().references(() => workspaces.id), + createdByUserId: uuid("created_by_user_id").references(() => users.id), + + title: text().notNull(), + description: text().notNull(), + goal: text().notNull(), + verifyCommand: text("verify_command").notNull(), + + // Budget + maxIterations: integer("max_iterations").notNull().default(10), + maxWallClockSec: integer("max_wall_clock_sec").notNull().default(1800), + + // State + status: text().notNull().default("queued"), // queued|preparing|running|verifying|done|failed|escalated|aborted + phase: text(), + iteration: integer().notNull().default(0), + attachedPaneId: text("attached_pane_id"), + + // Verdict + verdictPassed: boolean("verdict_passed"), + verdictReason: text("verdict_reason"), + verdictFailingTest: text("verdict_failing_test"), + + // Artifacts + artifactPath: text("artifact_path").notNull(), // .superset/todo// + + createdAt: timestamp("created_at").notNull().defaultNow(), + updatedAt: timestamp("updated_at").notNull().defaultNow(), + startedAt: timestamp("started_at"), + completedAt: timestamp("completed_at"), +}, (table) => [ + index("todo_sessions_workspace_idx").on(table.workspaceId), + index("todo_sessions_status_idx").on(table.status), +]); + +export type InsertTodoSession = typeof todoSessions.$inferInsert; +export type SelectTodoSession = typeof todoSessions.$inferSelect; +``` + +ユーザー側で `bunx drizzle-kit generate --name="add_todo_sessions"` を実行する。 +リポジトリポリシーに従い、こちらでは実行しない。 + +## tRPC サーフェス + +``` +todoAgent.create(input) → { sessionId } +todoAgent.list(workspaceId) → SelectTodoSession[] +todoAgent.get(sessionId) → SelectTodoSession +todoAgent.attachPane(sessionId, paneId) → void +todoAgent.pause(sessionId) → void +todoAgent.resume(sessionId) → void +todoAgent.abort(sessionId) → void +todoAgent.sendInput(sessionId, data) → void (passthrough to terminal.write) +todoAgent.subscribeState(sessionId) → observable +``` + +すべての subscription は `observable` ヘルパーを使い、 +`apps/desktop/AGENTS.md` に記載された trpc-electron の制約を満たす。 + +## 段階的な提供 + +**Phase 1(このブランチ)** +- DB テーブル + migration +- 単一タスク対応・キューなし・idle 検知ループ・child_process による verify を備えた + Supervisor の骨組み +- ライブペイン埋め込み付きの `TodoButton` + `TodoModal` + `TodoPanel` +- Pause / Abort / Send Input + +**Phase 2** +- キュー + (複数セッションの逐次実行) +- Futility 検知の強化 +- `--settings` を使った Stop hook 連携の任意対応 +- Issue URL の自動取り込み + (`gh issue view` → ゴールの事前入力) + +**Phase 3** +- `done` 時の PR draft 自動作成 +- 通知 +- 追加 worktree による並列実行 + +## 未解決事項 + +- インストール済みの Claude Code バイナリが、セッション単位の hook 注入用に + `--settings ` フラグをサポートしているかどうか。 + Phase 2 の確認項目とする。 +- `verifyCommand` をワーカー PTY 内で実行するべきか、 + 別 child process で実行するべきか。現行案では、 + verify 出力でユーザーに見えるターミナルを汚さないため、 + 別 child process を使う。verify 出力をインラインで見たい要望が強ければ再検討する。 +- クラウドワークスペース実行時に、artifact + (`.superset/todo//`)をどこへ永続化するか。 + v1 ではローカル限定のため対象外。 diff --git a/apps/desktop/src/lib/trpc/routers/index.ts b/apps/desktop/src/lib/trpc/routers/index.ts index 0911ffbb6e2..b4ccb83604e 100644 --- a/apps/desktop/src/lib/trpc/routers/index.ts +++ b/apps/desktop/src/lib/trpc/routers/index.ts @@ -1,5 +1,7 @@ import type { BrowserWindow } from "electron"; import type { WindowManager } from "main/lib/window-manager"; +// Fork-local: TODO autonomous agent feature. +import { createTodoAgentRouter } from "main/todo-agent"; import { router } from ".."; import { createAnalyticsRouter } from "./analytics"; import { createAuthRouter } from "./auth"; @@ -77,6 +79,7 @@ export const createAppRouter = ( tabTearoff: createTabTearoffRouter(wm), extensions: createExtensionsRouter(getWindow), vscodeExtensions: createVscodeExtensionsRouter(), + todoAgent: createTodoAgentRouter(), }); }; diff --git a/apps/desktop/src/main/todo-agent/enhance-text.ts b/apps/desktop/src/main/todo-agent/enhance-text.ts new file mode 100644 index 00000000000..cc24ad23d9c --- /dev/null +++ b/apps/desktop/src/main/todo-agent/enhance-text.ts @@ -0,0 +1,102 @@ +import { generateText, type LanguageModel } from "ai"; +import { + callSmallModel, + type SmallModelAttempt, +} from "lib/ai/call-small-model"; + +/** + * AI-rewrite helper for the TODO creation form. Takes a piece of user- + * written text (rough description or rough goal) and rewrites it into a + * clearer, LLM-friendly instruction. Uses the existing `callSmallModel` + * plumbing so credentials, provider fallback, and diagnostics all come + * for free — same path as the workspace auto-namer. + * + * The system prompts are deliberately kept short and concrete. They do + * NOT add length; they rewrite in place. + */ + +export type TodoTextKind = "description" | "goal"; + +const INSTRUCTIONS: Record = { + description: [ + "あなたはユーザーが書いた雑な TODO の記述を、自律コーディングエージェントが理解しやすい明確な指示に書き換えるアシスタントです。", + "", + "次の観点で書き換えてください:", + "- 何をすべきかを具体的に", + "- 前提・対象ファイル・制約が推測できる範囲で明示", + "- 曖昧な表現(ちゃんと/きれいに/いい感じに 等)を避ける", + "- 元の意図は絶対に保つ。新しい要件を勝手に追加しない", + "- 過剰な装飾・前置き・解説を付けない", + "- 日本語で書く", + "- 1〜6 行程度に収める", + "- 出力は書き換え後のテキストのみ。引用符や見出しを付けない", + ].join("\n"), + goal: [ + "あなたはユーザーが書いた雑な TODO のゴールを、自律コーディングエージェントが完了判定に使える明確な受け入れ条件に書き換えるアシスタントです。", + "", + "次の観点で書き換えてください:", + "- 「〜ができている」「〜が動作している」「〜が存在する」など検証可能な形にする", + "- 複数ある場合は箇条書き(行頭 '- ')で列挙", + "- 曖昧な表現を避ける", + "- 元の意図を保つ", + "- 日本語で書く", + "- 合計で 1〜6 行程度に収める", + "- 出力は書き換え後のテキストのみ。引用符や見出しを付けない", + ].join("\n"), +}; + +export interface EnhanceTodoTextResult { + text: string | null; + attempts: SmallModelAttempt[]; +} + +export async function enhanceTodoText( + rawText: string, + kind: TodoTextKind, +): Promise { + const cleaned = rawText.trim(); + if (!cleaned) { + return { text: null, attempts: [] }; + } + + const system = INSTRUCTIONS[kind]; + + const { result, attempts } = await callSmallModel({ + invoke: async ({ model }) => { + const { text } = await generateText({ + model: model as LanguageModel, + system, + prompt: cleaned, + }); + const trimmed = text.trim(); + return trimmed.length > 0 ? trimmed : null; + }, + }); + + return { text: result ?? null, attempts }; +} + +/** + * Turn a failed `callSmallModel` attempt list into a user-facing error + * message in Japanese. Returns a generic fallback if no attempt carries + * a useful reason. + */ +export function describeEnhanceFailure(attempts: SmallModelAttempt[]): string { + for (let index = attempts.length - 1; index >= 0; index -= 1) { + const attempt = attempts[index]; + if (!attempt) continue; + if (attempt.outcome === "expired-credentials") { + return `${attempt.issue?.message ?? `${attempt.providerName} の認証が切れています`}。設定から再接続してください。`; + } + if (attempt.outcome === "failed") { + return `${attempt.providerName} での書き換えに失敗しました: ${attempt.issue?.message ?? attempt.reason ?? "unknown"}`; + } + if (attempt.outcome === "unsupported-credentials") { + return `${attempt.providerName} の認証種別が書き換えに対応していません。`; + } + } + if (attempts.every((a) => a.outcome === "missing-credentials")) { + return "AI 書き換えに使えるモデルアカウントが接続されていません。設定から Anthropic か OpenAI を接続してください。"; + } + return "AI 書き換えに失敗しました。"; +} diff --git a/apps/desktop/src/main/todo-agent/git-status.ts b/apps/desktop/src/main/todo-agent/git-status.ts new file mode 100644 index 00000000000..fb3cef7e805 --- /dev/null +++ b/apps/desktop/src/main/todo-agent/git-status.ts @@ -0,0 +1,207 @@ +import { execGitWithShellPath } from "lib/trpc/routers/workspaces/utils/git-client"; + +/** + * Git inspection helpers scoped to a TODO session. + * + * All operations are read-only and routed through `execGitWithShellPath` + * so shell PATH is resolved correctly (same helper the rest of the app's + * git plumbing uses). The session's `startHeadSha` column — captured by + * the supervisor the moment `runSession` begins — anchors "what this + * session produced" vs. "what was already there", so commits the user + * made before the session are never attributed to it. + */ + +async function gitOut(args: string[], cwd: string): Promise { + try { + const { stdout } = await execGitWithShellPath(args, { cwd }); + return stdout; + } catch { + return ""; + } +} + +export async function getCurrentHeadSha(cwd: string): Promise { + const out = (await gitOut(["rev-parse", "HEAD"], cwd)).trim(); + return out || null; +} + +export interface SessionGitCommit { + sha: string; + shortSha: string; + subject: string; + authorName: string; + authorDate: string; +} + +export type SessionGitFileStage = "staged" | "unstaged" | "untracked"; + +export interface SessionGitFile { + path: string; + stage: SessionGitFileStage; + /** Raw git status letter — M / A / D / R / C / U / ? */ + code: string; +} + +export interface SessionGitSnapshot { + branch: string | null; + startHeadSha: string | null; + currentHeadSha: string | null; + commits: SessionGitCommit[]; + workingTree: SessionGitFile[]; + ahead: number; + behind: number; +} + +const COMMIT_DELIM = "\x00"; +const COMMIT_FORMAT = ["%H", "%h", "%s", "%an", "%aI"].join(COMMIT_DELIM); + +export async function getSessionGitSnapshot(params: { + cwd: string; + startHeadSha: string | null; +}): Promise { + const { cwd, startHeadSha } = params; + + const [branchOut, currentOut] = await Promise.all([ + gitOut(["rev-parse", "--abbrev-ref", "HEAD"], cwd), + gitOut(["rev-parse", "HEAD"], cwd), + ]); + const branch = branchOut.trim() || null; + const currentHeadSha = currentOut.trim() || null; + + // Commits produced since the session started. If start and current + // are the same (no new commits yet) this returns an empty list. + let commits: SessionGitCommit[] = []; + if (startHeadSha && currentHeadSha && startHeadSha !== currentHeadSha) { + const logOut = await gitOut( + [ + "log", + `${startHeadSha}..${currentHeadSha}`, + `--format=${COMMIT_FORMAT}`, + ], + cwd, + ); + commits = logOut + .split("\n") + .filter((l) => l.length > 0) + .map((line) => { + const [sha, shortSha, subject, authorName, authorDate] = + line.split(COMMIT_DELIM); + return { + sha: sha ?? "", + shortSha: shortSha ?? "", + subject: subject ?? "", + authorName: authorName ?? "", + authorDate: authorDate ?? "", + }; + }); + } + + // Working tree state via porcelain v1 for stable parsing. + const statusOut = await gitOut( + ["status", "--porcelain=v1", "--untracked-files=all"], + cwd, + ); + const workingTree: SessionGitFile[] = []; + const seen = new Set(); + for (const line of statusOut.split("\n")) { + if (line.length < 3) continue; + const indexStatus = line[0] ?? " "; + const wtStatus = line[1] ?? " "; + const filePath = line.slice(3); + const key = `${filePath}|${indexStatus}${wtStatus}`; + if (seen.has(key)) continue; + seen.add(key); + if (indexStatus === "?" && wtStatus === "?") { + workingTree.push({ path: filePath, stage: "untracked", code: "?" }); + continue; + } + if (indexStatus !== " " && indexStatus !== "?") { + workingTree.push({ + path: filePath, + stage: "staged", + code: indexStatus, + }); + } + if (wtStatus !== " " && wtStatus !== "?") { + workingTree.push({ + path: filePath, + stage: "unstaged", + code: wtStatus, + }); + } + } + + // Ahead/behind relative to upstream, if configured. Failure is + // expected when no upstream is set, so swallow silently. + let ahead = 0; + let behind = 0; + const rlOut = ( + await gitOut(["rev-list", "--left-right", "--count", "HEAD...@{u}"], cwd) + ).trim(); + if (rlOut) { + const parts = rlOut.split(/\s+/); + if (parts.length === 2) { + ahead = Number(parts[0]) || 0; + behind = Number(parts[1]) || 0; + } + } + + return { + branch, + startHeadSha, + currentHeadSha, + commits, + workingTree, + ahead, + behind, + }; +} + +export type SessionDiffScope = "session" | "staged" | "unstaged" | "commit"; + +export async function getSessionFileDiff(params: { + cwd: string; + startHeadSha: string | null; + path: string; + scope: SessionDiffScope; + commitSha?: string; +}): Promise { + const { cwd, startHeadSha, path, scope, commitSha } = params; + const args: string[] = ["--no-pager", "diff", "--no-color"]; + + switch (scope) { + case "session": + if (!startHeadSha) return ""; + args.push(`${startHeadSha}..HEAD`, "--", path); + break; + case "staged": + args.push("--cached", "--", path); + break; + case "unstaged": + args.push("--", path); + break; + case "commit": { + if (!commitSha) return ""; + // Whole-commit diff: `git show --format= ` returns just + // the patch, no commit header. When the caller supplies a + // path we scope to that file via `-- `; when the path + // is empty (UI selects a commit row, not a specific file), + // we must NOT append an empty pathspec or Git rejects it + // with "empty string is not a valid pathspec" and the diff + // silently disappears from the sidebar. + const showArgs = [ + "--no-pager", + "show", + "--no-color", + "--format=", + commitSha, + ]; + if (path && path.length > 0) { + showArgs.push("--", path); + } + return gitOut(showArgs, cwd); + } + } + + return gitOut(args, cwd); +} diff --git a/apps/desktop/src/main/todo-agent/index.ts b/apps/desktop/src/main/todo-agent/index.ts new file mode 100644 index 00000000000..25f1a8a0265 --- /dev/null +++ b/apps/desktop/src/main/todo-agent/index.ts @@ -0,0 +1,5 @@ +export { getTodoSessionStore } from "./session-store"; +export { getTodoSupervisor } from "./supervisor"; +export type { TodoAgentRouter } from "./trpc-router"; +export { createTodoAgentRouter } from "./trpc-router"; +export * from "./types"; diff --git a/apps/desktop/src/main/todo-agent/session-store.ts b/apps/desktop/src/main/todo-agent/session-store.ts new file mode 100644 index 00000000000..29def23fd99 --- /dev/null +++ b/apps/desktop/src/main/todo-agent/session-store.ts @@ -0,0 +1,357 @@ +import { EventEmitter } from "node:events"; +import { existsSync, mkdirSync, readFileSync } from "node:fs"; +import { appendFile } from "node:fs/promises"; +import path from "node:path"; +import { + projects, + type SelectTodoSession, + todoSessions, + workspaces, + worktrees, +} from "@superset/local-db"; +import { desc, eq, inArray, isNull } from "drizzle-orm"; +import { localDb } from "main/lib/local-db"; +import type { + TodoSessionListEntry, + TodoSessionStateEvent, + TodoStreamEvent, + TodoStreamUpdate, +} from "./types"; + +export type { TodoSessionListEntry }; + +const STREAM_JSONL_FILE = "stream.jsonl"; + +/** + * Cap on the number of stream events we keep in memory per session. Enough + * to show "the whole current run" in the UI without letting an unbounded + * stream balloon process memory. Older events are dropped from the head. + */ +const STREAM_EVENT_BUFFER_CAP = 500; + +/** + * In-memory session bookkeeping + persistence helpers for the TODO agent. + * + * All state transitions go through `updateSession` so we have exactly one + * place that writes to the DB and emits the state event consumed by the + * tRPC subscription. + */ +class TodoSessionStore { + private readonly emitter = new EventEmitter(); + /** In-memory per-session stream event buffer. Not persisted. */ + private readonly streamBuffers = new Map(); + /** + * Cached absolute artifact path per sessionId. The supervisor + * primes this at the start of each run via `setArtifactPathCache` + * so append-hot stream writes do not need to hit SQLite on every + * event. + */ + private readonly artifactPathCache = new Map(); + /** + * Per-session serialized append chain. `appendFile` from + * node:fs/promises is async, and bursts of stream events can race + * and write out-of-order. We sequence them per session via a + * promise chain — cheap and avoids reordering the JSONL. + */ + private readonly persistQueues = new Map>(); + + constructor() { + this.emitter.setMaxListeners(0); + this.rehydrateStrandedSessions(); + } + + setArtifactPathCache(sessionId: string, artifactPath: string | null): void { + if (artifactPath?.startsWith("/")) { + this.artifactPathCache.set(sessionId, artifactPath); + // Make sure the directory exists once, up-front, so the async + // appendFile calls below never race on mkdir. + try { + mkdirSync(artifactPath, { recursive: true }); + } catch (error) { + console.warn("[todo-agent] artifact mkdir failed", error); + } + } else { + this.artifactPathCache.delete(sessionId); + } + } + + appendStreamEvents(sessionId: string, events: TodoStreamEvent[]): void { + if (events.length === 0) return; + const buffer = this.streamBuffers.get(sessionId) ?? []; + buffer.push(...events); + // Drop from the head if we are over the cap so the tail (most + // recent activity) is always preserved. + if (buffer.length > STREAM_EVENT_BUFFER_CAP) { + buffer.splice(0, buffer.length - STREAM_EVENT_BUFFER_CAP); + } + this.streamBuffers.set(sessionId, buffer); + + // Persist every event to disk so that sessions stay reviewable + // across app restarts and after the in-memory cap evicts them. + // The file lives inside the per-session artifact dir we already + // created via `prepareArtifacts`, so cleanup is automatic when + // the session (and its artifact dir) are deleted. + this.persistStreamEvents(sessionId, events); + + const update: TodoStreamUpdate = { sessionId, events }; + this.emitter.emit(`stream:${sessionId}`, update); + } + + getStreamEvents(sessionId: string): TodoStreamEvent[] { + const inMemory = this.streamBuffers.get(sessionId); + if (inMemory && inMemory.length > 0) return [...inMemory]; + // Fall back to the JSONL file — this is how we hydrate a past + // session whose in-memory buffer was cleared (either by app + // restart or by the eviction cap). + return this.loadStreamEventsFromDisk(sessionId); + } + + clearStreamEvents(sessionId: string): void { + this.streamBuffers.delete(sessionId); + } + + private persistStreamEvents( + sessionId: string, + events: TodoStreamEvent[], + ): void { + // Fast-path: use the cached absolute path the supervisor primed + // when the run started. Falls back to a DB read only when no + // cache entry exists (e.g. a historical session being replayed + // outside of a run). + let dir = this.artifactPathCache.get(sessionId); + if (!dir) { + const session = this.get(sessionId); + dir = session?.artifactPath; + if (dir?.startsWith("/")) { + this.artifactPathCache.set(sessionId, dir); + } + } + if (!dir || !dir.startsWith("/")) return; + const filePath = path.join(dir, STREAM_JSONL_FILE); + const body = `${events.map((e) => JSON.stringify(e)).join("\n")}\n`; + + // Chain async appends so bursty event streams stay ordered in + // the JSONL file and main process is not blocked on fs I/O. + const previous = this.persistQueues.get(sessionId) ?? Promise.resolve(); + const nextTask = previous + .catch(() => {}) + .then(() => appendFile(filePath, body, "utf8")) + .catch((error) => { + console.warn("[todo-agent] stream persist failed", error); + }); + this.persistQueues.set(sessionId, nextTask); + } + + /** + * On app startup, any session that was mid-run when the previous + * process died will still have a non-terminal status + * (`preparing` / `running` / `verifying`) in the DB. The + * in-memory supervisor is obviously gone, so those rows would + * otherwise render as "running" forever in the Agent Manager + * with no way to start, stop, or re-run them. Flip them to + * `failed` once with a clear reason so the user can immediately + * delete or re-run from the UI. + */ + private rehydrateStrandedSessions(): void { + try { + const stranded = localDb + .update(todoSessions) + .set({ + status: "failed", + phase: "failed", + verdictPassed: false, + verdictReason: + "前回の実行が中断されました(アプリ再起動)。再実行するか削除してください。", + completedAt: Date.now(), + updatedAt: Date.now(), + }) + .where( + inArray(todoSessions.status, ["preparing", "running", "verifying"]), + ) + .returning() + .all(); + if (stranded.length > 0) { + console.log( + `[todo-agent] rehydrated ${stranded.length} stranded session(s)`, + ); + } + } catch (error) { + console.warn("[todo-agent] rehydrate on startup failed", error); + } + } + + private loadStreamEventsFromDisk(sessionId: string): TodoStreamEvent[] { + try { + const session = this.get(sessionId); + const dir = session?.artifactPath; + if (!dir || !dir.startsWith("/")) return []; + const filePath = path.join(dir, STREAM_JSONL_FILE); + if (!existsSync(filePath)) return []; + const text = readFileSync(filePath, "utf8"); + const lines = text.split("\n").filter((l) => l.length > 0); + const events: TodoStreamEvent[] = []; + for (const line of lines) { + try { + const parsed = JSON.parse(line) as TodoStreamEvent; + if ( + parsed && + typeof parsed === "object" && + typeof parsed.id === "string" && + typeof parsed.kind === "string" + ) { + events.push(parsed); + } + } catch { + // Skip malformed line. + } + } + return events; + } catch (error) { + console.warn("[todo-agent] stream load failed", error); + return []; + } + } + + subscribeStream( + sessionId: string, + handler: (update: TodoStreamUpdate) => void, + ): () => void { + const key = `stream:${sessionId}`; + this.emitter.on(key, handler); + return () => { + this.emitter.off(key, handler); + }; + } + + insert( + row: Omit & { + id?: string; + }, + ): SelectTodoSession { + const inserted = localDb.insert(todoSessions).values(row).returning().get(); + this.emit(inserted); + return inserted; + } + + get(sessionId: string): SelectTodoSession | undefined { + return localDb + .select() + .from(todoSessions) + .where(eq(todoSessions.id, sessionId)) + .get(); + } + + listForWorkspace(workspaceId: string): SelectTodoSession[] { + return localDb + .select() + .from(todoSessions) + .where(eq(todoSessions.workspaceId, workspaceId)) + .orderBy(desc(todoSessions.createdAt)) + .all(); + } + + /** + * Cross-workspace list used by the Agent-Manager-style view. Joins in + * workspace + project names so the manager can group and label rows + * without issuing N extra queries. Deleted workspaces + * (`deletingAt IS NOT NULL`) are filtered out. + */ + listAll(): TodoSessionListEntry[] { + const rows = localDb + .select({ + session: todoSessions, + workspaceName: workspaces.name, + workspaceBranch: workspaces.branch, + workspaceDeletingAt: workspaces.deletingAt, + projectName: projects.name, + }) + .from(todoSessions) + .leftJoin(workspaces, eq(workspaces.id, todoSessions.workspaceId)) + .leftJoin(projects, eq(projects.id, workspaces.projectId)) + .where(isNull(workspaces.deletingAt)) + .orderBy(desc(todoSessions.createdAt)) + .all(); + return rows.map((row) => ({ + ...row.session, + workspaceName: row.workspaceName ?? null, + workspaceBranch: row.workspaceBranch ?? null, + projectName: row.projectName ?? null, + })); + } + + update( + sessionId: string, + patch: Partial, + ): SelectTodoSession | undefined { + const next = { + ...patch, + updatedAt: Date.now(), + }; + const updated = localDb + .update(todoSessions) + .set(next) + .where(eq(todoSessions.id, sessionId)) + .returning() + .get(); + if (updated) this.emit(updated); + return updated; + } + + remove(sessionId: string): boolean { + const result = localDb + .delete(todoSessions) + .where(eq(todoSessions.id, sessionId)) + .run(); + this.clearStreamEvents(sessionId); + return result.changes > 0; + } + + subscribe( + sessionId: string, + handler: (event: TodoSessionStateEvent) => void, + ): () => void { + const key = `session:${sessionId}`; + this.emitter.on(key, handler); + return () => { + this.emitter.off(key, handler); + }; + } + + private emit(session: SelectTodoSession): void { + const event: TodoSessionStateEvent = { + sessionId: session.id, + session, + }; + this.emitter.emit(`session:${session.id}`, event); + } +} + +let singleton: TodoSessionStore | undefined; + +export function getTodoSessionStore(): TodoSessionStore { + if (!singleton) singleton = new TodoSessionStore(); + return singleton; +} + +/** + * Resolve the absolute filesystem path a TODO session should run in for a + * given workspace. For `type="worktree"` workspaces this is the worktree + * path; for `type="branch"` workspaces there is no worktree row and we + * fall back to the project's `mainRepoPath`, matching the resolution + * strategy used by the existing terminal runtime in + * `workspace-terminal-context.ts`. Returns undefined only when the + * workspace does not exist. + */ +export function resolveWorktreePath(workspaceId: string): string | undefined { + const row = localDb + .select({ + worktreePath: worktrees.path, + mainRepoPath: projects.mainRepoPath, + }) + .from(workspaces) + .leftJoin(projects, eq(projects.id, workspaces.projectId)) + .leftJoin(worktrees, eq(worktrees.id, workspaces.worktreeId)) + .where(eq(workspaces.id, workspaceId)) + .get(); + return row?.worktreePath ?? row?.mainRepoPath ?? undefined; +} diff --git a/apps/desktop/src/main/todo-agent/supervisor.ts b/apps/desktop/src/main/todo-agent/supervisor.ts new file mode 100644 index 00000000000..eb7d866e7e2 --- /dev/null +++ b/apps/desktop/src/main/todo-agent/supervisor.ts @@ -0,0 +1,1098 @@ +import { type ChildProcess, spawn } from "node:child_process"; +import { createHash, randomUUID } from "node:crypto"; +import { mkdirSync, writeFileSync } from "node:fs"; +import path from "node:path"; +import type { SelectTodoSession } from "@superset/local-db"; +import { getCurrentHeadSha } from "./git-status"; +import { getTodoSessionStore, resolveWorktreePath } from "./session-store"; +import type { TodoStreamEventKind } from "./types"; +import { TODO_ARTIFACT_SUBDIR } from "./types"; + +/** + * Headless Claude Code driver for TODO autonomous sessions. + * + * The previous iteration drove interactive Claude Code through a real PTY + * and tried to detect turn completion with an idle heuristic. That was + * fundamentally unreliable (long-thinking Claude looked identical to a + * dead claude), and the PTY leaked into the workspace tab bar. This + * rewrite replaces the PTY with `claude -p --output-format stream-json`: + * + * - The child process is spawned by the main process (no PTY, no tab + * bar involvement, no hidden-tab hacks). + * - Completion is **process exit**. No idle heuristic. No guessing. + * - The `result` NDJSON event carries `result` (the final assistant + * text), `session_id`, `total_cost_usd`, and `num_turns`, which are + * stored on the DB row so the Manager can show a real verdict and + * timing information. + * - Retry iterations use `--resume ` so the same + * conversation state is preserved across verify failures. + * - Per-turn stream events are appended to an in-memory ring buffer + * and fanned out over a tRPC subscription so the Manager detail + * pane shows a live, chat-like view of the worker's activity. + * + * `--bare` is deliberately NOT passed. The `--bare` flag forces + * `ANTHROPIC_API_KEY` and explicitly refuses OAuth/keychain reads, + * which would break users authenticated via Claude Max. We still gain + * reproducibility because we own every argument and the CLAUDE.md + * discovery just adds project context, not hooks we do not want. + */ +interface ActiveRun { + sessionId: string; + abortController: AbortController; + lastFailingTest?: string; + consecutiveSameFailure: number; + startedAt: number; + currentChild: ChildProcess | null; +} + +class TodoSupervisor { + private active: ActiveRun | undefined; + private readonly queue: string[] = []; + + /** + * Pre-compute the artifact directory path for a not-yet-inserted + * session. Called from the `create` mutation BEFORE the row is + * written so the DB insert can land the final absolute path in one + * shot — no more two-step `PENDING` → update dance, no more + * half-written rows left behind by a crash between the two steps. + */ + computeArtifactPath(params: { + sessionId: string; + workspaceId: string; + }): string { + const worktreePath = resolveWorktreePath(params.workspaceId); + if (!worktreePath) { + throw new Error( + `todo-agent: workspace ${params.workspaceId} has no resolvable path`, + ); + } + return path.join(worktreePath, TODO_ARTIFACT_SUBDIR, params.sessionId); + } + + /** + * Materialize the artifact directory and write the initial goal.md. + * Called right after insert. Idempotent — safe to call on rerun. + */ + prepareArtifacts(session: SelectTodoSession): string { + const dir = session.artifactPath; + mkdirSync(dir, { recursive: true }); + writeFileSync(path.join(dir, "goal.md"), renderGoalDoc(session), "utf8"); + return dir; + } + + async start(sessionId: string): Promise { + if (this.active) { + if (!this.queue.includes(sessionId)) this.queue.push(sessionId); + return; + } + await this.runSession(sessionId); + while (this.queue.length > 0) { + const next = this.queue.shift(); + if (!next) continue; + // A session can be aborted / deleted / rerun while still + // waiting in the queue. Re-check its latest persisted status + // before actually running it so we never revive an already + // terminal session into execution. + const latest = getTodoSessionStore().get(next); + if (!latest) continue; + if ( + latest.status === "aborted" || + latest.status === "failed" || + latest.status === "done" || + latest.status === "escalated" + ) { + continue; + } + await this.runSession(next); + } + } + + abort(sessionId: string): void { + const store = getTodoSessionStore(); + // If the session is still waiting in the pending queue, drop it + // from there so the drain loop does not silently revive it once + // the active run finishes. + const queueIdx = this.queue.indexOf(sessionId); + if (queueIdx !== -1) { + this.queue.splice(queueIdx, 1); + } + if (this.active?.sessionId === sessionId) { + this.active.abortController.abort(); + // Send SIGINT first (clean shutdown), then SIGKILL as a safety + // net via a short timer so we never leak a runaway child. + const child = this.active.currentChild; + if (child && !child.killed) { + try { + child.kill("SIGINT"); + } catch { + // ignore + } + setTimeout(() => { + if (child && !child.killed) { + try { + child.kill("SIGKILL"); + } catch { + // ignore + } + } + }, 1500); + } + } + const session = store.get(sessionId); + if (!session) return; + if ( + session.status !== "done" && + session.status !== "failed" && + session.status !== "escalated" && + session.status !== "aborted" + ) { + store.update(sessionId, { + status: "aborted", + phase: "aborted", + completedAt: Date.now(), + }); + } + } + + /** + * Queue a free-form user intervention that will be prepended to the + * next turn's prompt. In the headless architecture we cannot inject + * mid-stream, so interventions land at the next turn boundary. + */ + queueIntervention(sessionId: string, data: string): void { + const store = getTodoSessionStore(); + const existing = store.get(sessionId); + if (!existing) return; + const previous = existing.pendingIntervention?.trim(); + const next = [previous, data.trim()].filter(Boolean).join("\n\n"); + store.update(sessionId, { pendingIntervention: next }); + } + + // ---- internals ---- + + private async runSession(sessionId: string): Promise { + const store = getTodoSessionStore(); + const session0 = store.get(sessionId); + if (!session0) return; + + // Fresh in-memory buffer for this run. Old events from previous + // runs of the same session are cleared so the UI sees just the + // current attempt. + store.clearStreamEvents(sessionId); + // Prime the artifact-path cache so the hot stream-persist path + // does not need to do a synchronous SQLite read per event. + store.setArtifactPathCache(sessionId, session0.artifactPath); + + const ac = new AbortController(); + const run: ActiveRun = { + sessionId, + abortController: ac, + consecutiveSameFailure: 0, + startedAt: Date.now(), + currentChild: null, + }; + this.active = run; + + try { + appendSetupEvent( + sessionId, + "セットアップ", + "ワークスペースを解決しています…", + ); + const worktreePath = resolveWorktreePath(session0.workspaceId); + // Capture the git HEAD at session start so the Manager's right + // sidebar can show exactly what this session produced via + // `git log ..HEAD` — user commits made before + // the session are excluded from attribution. + if (worktreePath) { + appendSetupEvent(sessionId, "worktree", worktreePath); + } + const startHeadSha = worktreePath + ? await getCurrentHeadSha(worktreePath) + : null; + if (startHeadSha) { + appendSetupEvent( + sessionId, + "開始時 HEAD", + `${startHeadSha.slice(0, 12)}`, + ); + } + if (session0.verifyCommand) { + appendSetupEvent(sessionId, "verify", session0.verifyCommand); + } else { + appendSetupEvent(sessionId, "モード", "単発タスク(外部 verify なし)"); + } + appendSetupEvent( + sessionId, + "予算", + `${session0.maxIterations} iter · ${Math.round(session0.maxWallClockSec / 60)} 分`, + ); + appendSetupEvent( + sessionId, + "Claude", + "claude -p --output-format stream-json を起動します", + ); + + store.update(sessionId, { + status: "running", + phase: "running", + startedAt: Date.now(), + completedAt: null, + verdictPassed: null, + verdictReason: null, + verdictFailingTest: null, + finalAssistantText: null, + claudeSessionId: null, + totalCostUsd: null, + totalNumTurns: null, + iteration: 0, + startHeadSha, + }); + + if (!worktreePath) { + store.update(sessionId, { + status: "failed", + phase: "failed", + verdictReason: + "ワークスペースのパスを解決できませんでした(worktree も mainRepoPath も見つからない)", + completedAt: Date.now(), + }); + return; + } + + let claudeSessionId: string | null = null; + let lastAssistantText: string | null = null; + let aggregatedCostUsd = 0; + let aggregatedNumTurns = 0; + let iteration = 0; + + while (iteration < session0.maxIterations) { + if (ac.signal.aborted) break; + if (Date.now() - run.startedAt > session0.maxWallClockSec * 1000) { + store.update(sessionId, { + status: "escalated", + phase: "escalated", + verdictReason: "wall-clock 予算を使い切りました", + finalAssistantText: lastAssistantText, + claudeSessionId, + totalCostUsd: aggregatedCostUsd || null, + totalNumTurns: aggregatedNumTurns || null, + completedAt: Date.now(), + }); + return; + } + + iteration += 1; + store.update(sessionId, { + iteration, + phase: "running", + }); + + // Read-then-clear pending intervention at the turn boundary + // so user-queued steering actually reaches Claude. + const liveSession = store.get(sessionId); + const pendingIntervention = liveSession?.pendingIntervention ?? null; + if (pendingIntervention) { + store.update(sessionId, { pendingIntervention: null }); + } + + const currentSession = store.get(sessionId); + if (!currentSession) return; + + const prompt = buildIterationPrompt({ + session: currentSession, + iteration, + previousVerdictReason: currentSession.verdictReason ?? null, + intervention: pendingIntervention, + }); + + appendUserEvent(sessionId, iteration, prompt); + + const turnResult = await this.runClaudeTurn({ + sessionId, + iteration, + cwd: worktreePath, + prompt, + resumeSessionId: claudeSessionId, + customSystemPrompt: currentSession.customSystemPrompt ?? null, + signal: ac.signal, + onChild: (child) => { + run.currentChild = child; + }, + }); + run.currentChild = null; + + if (ac.signal.aborted) return; + + if (turnResult.error && !turnResult.result) { + store.update(sessionId, { + status: "failed", + phase: "failed", + verdictReason: turnResult.error, + finalAssistantText: lastAssistantText, + claudeSessionId, + totalCostUsd: aggregatedCostUsd || null, + totalNumTurns: aggregatedNumTurns || null, + completedAt: Date.now(), + }); + return; + } + + if (turnResult.sessionId) { + claudeSessionId = turnResult.sessionId; + } + if (turnResult.result) { + lastAssistantText = turnResult.result; + aggregatedCostUsd += turnResult.costUsd ?? 0; + aggregatedNumTurns += turnResult.numTurns ?? 0; + store.update(sessionId, { + claudeSessionId, + finalAssistantText: lastAssistantText, + totalCostUsd: aggregatedCostUsd || null, + totalNumTurns: aggregatedNumTurns || null, + }); + } + + // No verify → single-turn mode. Claude is done, we are done. + if (!currentSession.verifyCommand) { + store.update(sessionId, { + status: "done", + phase: "done", + verdictPassed: true, + verdictReason: lastAssistantText, + finalAssistantText: lastAssistantText, + claudeSessionId, + totalCostUsd: aggregatedCostUsd || null, + totalNumTurns: aggregatedNumTurns || null, + completedAt: Date.now(), + }); + return; + } + + store.update(sessionId, { phase: "verifying" }); + const verdict = await runVerify( + currentSession.verifyCommand, + worktreePath, + ac.signal, + ); + // If the user aborted while verify was running, bail out + // BEFORE we write any verdict state. Otherwise the aborted + // session would be tainted with "verify failed: AbortError…" + // even though verify was never allowed to finish. + if (ac.signal.aborted) return; + appendVerifyEvent(sessionId, iteration, verdict); + + if (verdict.passed) { + store.update(sessionId, { + status: "done", + phase: "done", + verdictPassed: true, + verdictReason: + lastAssistantText ?? "verify コマンドが exit 0 で完了しました", + finalAssistantText: lastAssistantText, + claudeSessionId, + totalCostUsd: aggregatedCostUsd || null, + totalNumTurns: aggregatedNumTurns || null, + completedAt: Date.now(), + }); + return; + } + + // Futility: same failing test 3 iterations in a row → escalate + if ( + verdict.failingTest && + verdict.failingTest === run.lastFailingTest + ) { + run.consecutiveSameFailure += 1; + } else { + run.consecutiveSameFailure = 1; + run.lastFailingTest = verdict.failingTest; + } + if (run.consecutiveSameFailure >= 3) { + store.update(sessionId, { + status: "escalated", + phase: "escalated", + verdictPassed: false, + verdictReason: `futility: ${ + verdict.failingTest ?? "同一失敗" + } が ${run.consecutiveSameFailure} 回連続で再現しました`, + verdictFailingTest: verdict.failingTest, + finalAssistantText: lastAssistantText, + claudeSessionId, + totalCostUsd: aggregatedCostUsd || null, + totalNumTurns: aggregatedNumTurns || null, + completedAt: Date.now(), + }); + return; + } + + store.update(sessionId, { + verdictPassed: false, + verdictReason: tailForReason(verdict.log), + verdictFailingTest: verdict.failingTest, + }); + } + + // Only write the "iteration budget exhausted" verdict if we + // left the loop cleanly. If the user aborted, `abort()` has + // already written `status: "aborted"` and we must not + // overwrite it. Without this guard, a race between the abort + // signal and the final DB write mislabels aborted sessions + // as escalated with a wrong reason. + if (!ac.signal.aborted) { + store.update(sessionId, { + status: "escalated", + phase: "escalated", + verdictReason: "iteration 予算を使い切りました", + finalAssistantText: lastAssistantText, + claudeSessionId, + totalCostUsd: aggregatedCostUsd || null, + totalNumTurns: aggregatedNumTurns || null, + completedAt: Date.now(), + }); + } + } finally { + this.active = undefined; + } + } + + private runClaudeTurn(params: { + sessionId: string; + iteration: number; + cwd: string; + prompt: string; + resumeSessionId: string | null; + customSystemPrompt: string | null; + signal: AbortSignal; + onChild: (child: ChildProcess) => void; + }): Promise<{ + result: string | null; + sessionId: string | null; + costUsd: number | null; + numTurns: number | null; + error: string | null; + }> { + return new Promise((resolve) => { + const args = [ + "-p", + "--output-format", + "stream-json", + "--verbose", + "--include-partial-messages", + // `bypassPermissions` is required for truly unattended + // headless runs. `acceptEdits` auto-approves Edit/Write + // but still prompts for Bash tool calls; in `-p` mode + // there is nobody to grant that approval, so the child + // would hang forever waiting for a prompt that never + // comes, leaving the session stuck in `running` state. + // TODO agent is a deliberate-use feature where the user + // already opted into full autonomy, so bypassing all + // permission checks is the right default here. + "--permission-mode", + "bypassPermissions", + ]; + if (params.customSystemPrompt) { + args.push("--append-system-prompt", params.customSystemPrompt); + } + if (params.resumeSessionId) { + args.push("--resume", params.resumeSessionId); + } + args.push(params.prompt); + + let child: ChildProcess; + try { + child = spawn("claude", args, { + cwd: params.cwd, + env: process.env, + }); + } catch (error) { + resolve({ + result: null, + sessionId: null, + costUsd: null, + numTurns: null, + error: + error instanceof Error + ? `claude を起動できませんでした: ${error.message}` + : "claude を起動できませんでした", + }); + return; + } + + params.onChild(child); + + let claudeSessionId: string | null = null; + let resultText: string | null = null; + let costUsd: number | null = null; + let numTurns: number | null = null; + let errorText: string | null = null; + let stdoutBuffer = ""; + let stderrBuffer = ""; + let settled = false; + + const onAbort = () => { + try { + child.kill("SIGINT"); + } catch { + // ignore + } + }; + params.signal.addEventListener("abort", onAbort); + + // Single-shot settlement. `child.on("error", ...)` can fire + // WITHOUT a subsequent `close` (e.g. ENOENT when the claude + // binary is missing from PATH), and without this guard the + // outer promise would hang forever and the session would get + // stuck in `running`. Both the error and close handlers now + // funnel through this helper. + const settle = () => { + if (settled) return; + settled = true; + params.signal.removeEventListener("abort", onAbort); + if (stdoutBuffer.trim().length > 0) { + handleLine(stdoutBuffer.trim()); + stdoutBuffer = ""; + } + resolve({ + result: resultText, + sessionId: claudeSessionId, + costUsd, + numTurns, + error: errorText, + }); + }; + + const drainLines = (chunk: string) => { + stdoutBuffer += chunk; + let newlineIdx = stdoutBuffer.indexOf("\n"); + while (newlineIdx !== -1) { + const line = stdoutBuffer.slice(0, newlineIdx).trim(); + stdoutBuffer = stdoutBuffer.slice(newlineIdx + 1); + if (line.length > 0) { + handleLine(line); + } + newlineIdx = stdoutBuffer.indexOf("\n"); + } + }; + + const handleLine = (line: string) => { + let payload: unknown; + try { + payload = JSON.parse(line); + } catch { + appendRawEvent( + params.sessionId, + params.iteration, + "raw", + "raw", + line.slice(0, 600), + ); + return; + } + const parsed = classifyStreamJson(payload); + if (parsed.sessionId && !claudeSessionId) { + claudeSessionId = parsed.sessionId; + } + if (parsed.resultText) { + resultText = parsed.resultText; + } + if (parsed.costUsd != null) { + costUsd = parsed.costUsd; + } + if (parsed.numTurns != null) { + numTurns = parsed.numTurns; + } + if (parsed.event) { + getTodoSessionStore().appendStreamEvents(params.sessionId, [ + { + id: randomUUID(), + ts: Date.now(), + iteration: params.iteration, + kind: parsed.event.kind, + label: parsed.event.label, + text: parsed.event.text, + }, + ]); + } + }; + + child.stdout?.setEncoding("utf8"); + child.stdout?.on("data", (chunk: string) => { + drainLines(chunk); + }); + child.stderr?.setEncoding("utf8"); + child.stderr?.on("data", (chunk: string) => { + stderrBuffer += chunk; + if (stderrBuffer.length > 16_000) { + stderrBuffer = stderrBuffer.slice(-16_000); + } + }); + + child.on("error", (err) => { + // Spawn failures (ENOENT, EACCES) reach us via this event, + // often WITHOUT a follow-up `close`. Settle eagerly. + if (!errorText) { + errorText = `claude プロセスエラー: ${err.message}`; + } + settle(); + }); + child.on("close", (code) => { + if (code !== 0 && !resultText && !errorText) { + const tail = stderrBuffer.trim().split("\n").slice(-6).join("\n"); + errorText = `claude が exit code ${code} で終了しました${ + tail ? `:\n${tail}` : "" + }`; + } + settle(); + }); + }); + } +} + +let supervisor: TodoSupervisor | undefined; +export function getTodoSupervisor(): TodoSupervisor { + if (!supervisor) supervisor = new TodoSupervisor(); + return supervisor; +} + +// ---- helpers ---- + +function renderGoalDoc(session: SelectTodoSession): string { + const lines: string[] = [ + `# TODO: ${session.title}`, + "", + "## やって欲しいこと", + session.description, + "", + "## ゴール(受け入れ条件)", + session.goal?.trim() || + "(未指定。上記『やって欲しいこと』が完了した時点で完了とみなす)", + "", + ]; + if (session.verifyCommand) { + lines.push( + "## Verify コマンド", + "```sh", + session.verifyCommand, + "```", + "", + `予算: ${session.maxIterations} イテレーション / ${session.maxWallClockSec} 秒`, + "", + ); + } else { + lines.push( + "## モード", + "単発タスク。外部 verify は行いません。ゴールを達成したと判断したらターンを終えて停止してください。", + "", + ); + } + return lines.join("\n"); +} + +function buildIterationPrompt(params: { + session: SelectTodoSession; + iteration: number; + previousVerdictReason: string | null; + intervention: string | null; +}): string { + const { session, iteration, previousVerdictReason, intervention } = params; + const goalPath = `.superset/todo/${session.id}/goal.md`; + const goalClause = session.goal?.trim() + ? "ゴール(受け入れ条件)を達成することを目指してください" + : "『やって欲しいこと』が完了した時点で完了とみなしてください"; + + const sections: string[] = []; + if (iteration === 1) { + sections.push( + `${goalPath} を読んで、${goalClause}。作業ディレクトリは worktree のルートです。`, + ); + sections.push( + `タスクのタイトル: ${session.title}\n説明: ${session.description}`, + ); + if (session.goal?.trim()) { + sections.push(`ゴール:\n${session.goal.trim()}`); + } + } else { + sections.push( + `イテレーション ${iteration} です。前回の verify は失敗しました。`, + ); + if (previousVerdictReason) { + sections.push(`前回の verify 結果:\n${previousVerdictReason}`); + } + sections.push(`${goalPath} を読み直し、${goalClause}。`); + } + if (intervention) { + sections.push(`ユーザーからの介入指示(優先度: 高):\n${intervention}`); + } + if (session.verifyCommand) { + sections.push( + `完了判定: 作業が終わったら、セッション終了後に supervisor が \`${session.verifyCommand}\` を実行して exit 0 を要求します。`, + ); + } + return sections.join("\n\n"); +} + +function tailForReason(log: string): string { + const tail = log.trim().split("\n").slice(-20).join("\n"); + return tail.length > 2000 ? tail.slice(-2000) : tail; +} + +interface VerifyResult { + passed: boolean; + log: string; + failingTest?: string; +} + +function runVerify( + verifyCommand: string, + cwd: string, + signal: AbortSignal, +): Promise { + return new Promise((resolve) => { + const child = spawn("sh", ["-c", verifyCommand], { + cwd, + env: process.env, + signal, + }); + let buf = ""; + child.stdout.on("data", (d) => { + buf += d.toString(); + }); + child.stderr.on("data", (d) => { + buf += d.toString(); + }); + child.on("error", (err) => { + resolve({ passed: false, log: `${err.message}\n${buf}` }); + }); + child.on("close", (code) => { + const passed = code === 0; + resolve({ + passed, + log: buf, + failingTest: passed ? undefined : guessFailingTest(buf), + }); + }); + }); +} + +function guessFailingTest(log: string): string | undefined { + // biome-ignore lint/suspicious/noControlCharactersInRegex: stripping real ANSI escapes from verify output is the whole point + const stripAnsi = log.replace(/\u001B\[[0-9;]*m/g, ""); + const lines = stripAnsi.split("\n"); + const patterns: RegExp[] = [ + /^\s*\(fail\)\s+(.+?)(?:\s+\[\d.*)?$/i, + /^\s*❯\s+(.+?)(?:\s+\d+ms)?$/, + /^\s*FAIL\s+(.+?)(?:\s+>\s+.+)?$/, + /^\s*✕\s+(.+?)(?:\s+\(\d+\s*ms\))?$/, + /^\s*×\s+(.+?)(?:\s+\(\d+\s*ms\))?$/, + /^\s*✗\s+(.+?)(?:\s+\(\d+\s*ms\))?$/, + /^\s*not ok \d+\s*-\s*(.+)$/, + /^\s*\d+\)\s+(?:\[[^\]]+\]\s+)?[›»>]\s+(.+)$/, + ]; + for (const line of lines) { + for (const re of patterns) { + const m = line.match(re); + if (m?.[1]) return normalizeTestId(m[1]); + } + } + const errorLine = lines.find((l) => /\b(Error|Assertion)\b.*:/.test(l)); + if (errorLine) return normalizeTestId(errorLine.trim()); + return undefined; +} + +function normalizeTestId(raw: string): string { + return raw + .trim() + .replace(/\s*\(\d+\s*ms\)\s*$/, "") + .replace(/\s*\[\d+(?:\.\d+)?\s*m?s\]\s*$/, "") + .replace(/@0x[0-9a-f]+/gi, "@0x?") + .replace(/:\s*expected.*$/i, "") + .slice(0, 240); +} + +// ---- stream-json parsing ---- + +interface ClassifiedEvent { + kind: TodoStreamEventKind; + label: string; + text: string; +} + +interface ClassifiedLine { + sessionId: string | null; + resultText: string | null; + costUsd: number | null; + numTurns: number | null; + event: ClassifiedEvent | null; +} + +/** + * Reduce one NDJSON record emitted by `claude -p --output-format stream-json` + * into the condensed event our UI wants, plus any scalar fields we promote + * to DB columns. The Claude Code stream is stable enough to key on but we + * defensively handle unknown shapes by falling through to a `raw` event. + */ +function classifyStreamJson(payload: unknown): ClassifiedLine { + const empty: ClassifiedLine = { + sessionId: null, + resultText: null, + costUsd: null, + numTurns: null, + event: null, + }; + if (typeof payload !== "object" || payload === null) return empty; + const rec = payload as Record; + const type = typeof rec.type === "string" ? (rec.type as string) : ""; + const sessionId = + typeof rec.session_id === "string" ? (rec.session_id as string) : null; + + if (type === "system" && rec.subtype === "init") { + return { + ...empty, + sessionId, + event: { + kind: "system_init", + label: "init", + text: `session ${sessionId ?? "?"} 準備完了`, + }, + }; + } + + if (type === "assistant") { + const text = extractAssistantText(rec.message); + if (text) { + return { + ...empty, + sessionId, + event: { kind: "assistant_text", label: "Claude", text }, + }; + } + const tool = extractToolUseSummary(rec.message); + if (tool) { + return { + ...empty, + sessionId, + event: { kind: "tool_use", label: tool.label, text: tool.text }, + }; + } + return empty; + } + + if (type === "user") { + const text = extractToolResultText(rec.message); + if (text) { + return { + ...empty, + sessionId, + event: { + kind: "tool_result", + label: "tool result", + text: truncate(text, 400), + }, + }; + } + return empty; + } + + if (type === "result") { + const resultText = + typeof rec.result === "string" ? (rec.result as string) : null; + const costUsd = + typeof rec.total_cost_usd === "number" + ? (rec.total_cost_usd as number) + : null; + const numTurns = + typeof rec.num_turns === "number" ? (rec.num_turns as number) : null; + return { + sessionId, + resultText, + costUsd, + numTurns, + event: { + kind: "result", + label: "result", + text: resultText ?? "(空の結果)", + }, + }; + } + + if ( + type === "error" || + (typeof rec.subtype === "string" && rec.subtype === "error") + ) { + const message = + typeof rec.error === "string" + ? (rec.error as string) + : JSON.stringify(rec).slice(0, 400); + return { + ...empty, + sessionId, + event: { kind: "error", label: "error", text: message }, + }; + } + + return empty; +} + +function extractAssistantText(message: unknown): string | null { + if (typeof message !== "object" || message === null) return null; + const content = (message as { content?: unknown }).content; + if (!Array.isArray(content)) return null; + const parts: string[] = []; + for (const part of content) { + if (typeof part !== "object" || part === null) continue; + const rec = part as Record; + if (rec.type === "text" && typeof rec.text === "string") { + parts.push(rec.text as string); + } + } + const joined = parts.join("").trim(); + return joined.length > 0 ? joined : null; +} + +function extractToolUseSummary( + message: unknown, +): { label: string; text: string } | null { + if (typeof message !== "object" || message === null) return null; + const content = (message as { content?: unknown }).content; + if (!Array.isArray(content)) return null; + for (const part of content) { + if (typeof part !== "object" || part === null) continue; + const rec = part as Record; + if (rec.type !== "tool_use") continue; + const name = typeof rec.name === "string" ? (rec.name as string) : "tool"; + const input = rec.input; + const inputSummary = summarizeToolInput(name, input); + return { label: name, text: inputSummary }; + } + return null; +} + +function extractToolResultText(message: unknown): string | null { + if (typeof message !== "object" || message === null) return null; + const content = (message as { content?: unknown }).content; + if (!Array.isArray(content)) return null; + const parts: string[] = []; + for (const part of content) { + if (typeof part !== "object" || part === null) continue; + const rec = part as Record; + if (rec.type === "tool_result") { + const inner = rec.content; + if (typeof inner === "string") { + parts.push(inner); + } else if (Array.isArray(inner)) { + for (const p of inner) { + if (typeof p !== "object" || p === null) continue; + const pr = p as Record; + if (pr.type === "text" && typeof pr.text === "string") { + parts.push(pr.text as string); + } + } + } + } + } + const joined = parts.join("\n").trim(); + return joined.length > 0 ? joined : null; +} + +function summarizeToolInput(name: string, input: unknown): string { + if (typeof input !== "object" || input === null) { + return name; + } + const rec = input as Record; + const key = + typeof rec.command === "string" + ? (rec.command as string) + : typeof rec.file_path === "string" + ? (rec.file_path as string) + : typeof rec.path === "string" + ? (rec.path as string) + : typeof rec.pattern === "string" + ? (rec.pattern as string) + : typeof rec.description === "string" + ? (rec.description as string) + : null; + return key ? truncate(`${name}: ${key}`, 300) : name; +} + +function truncate(text: string, cap: number): string { + if (text.length <= cap) return text; + return `${text.slice(0, cap)}…`; +} + +function appendSetupEvent( + sessionId: string, + label: string, + text: string, +): void { + getTodoSessionStore().appendStreamEvents(sessionId, [ + { + id: randomUUID(), + ts: Date.now(), + iteration: 0, + kind: "system_init", + label, + text, + }, + ]); +} + +function appendUserEvent( + sessionId: string, + iteration: number, + prompt: string, +): void { + getTodoSessionStore().appendStreamEvents(sessionId, [ + { + id: randomUUID(), + ts: Date.now(), + iteration, + kind: "raw", + label: + iteration === 1 ? "最初のプロンプト" : `イテレーション ${iteration}`, + text: truncate(prompt, 4000), + }, + ]); +} + +function appendVerifyEvent( + sessionId: string, + iteration: number, + verdict: VerifyResult, +): void { + getTodoSessionStore().appendStreamEvents(sessionId, [ + { + id: randomUUID(), + ts: Date.now(), + iteration, + kind: verdict.passed ? "result" : "error", + label: verdict.passed ? "verify pass" : "verify fail", + text: truncate(verdict.log || "(no output)", 1200), + }, + ]); +} + +function appendRawEvent( + sessionId: string, + iteration: number, + kind: TodoStreamEventKind, + label: string, + text: string, +): void { + getTodoSessionStore().appendStreamEvents(sessionId, [ + { + id: randomUUID(), + ts: Date.now(), + iteration, + kind, + label, + text, + }, + ]); +} + +// Hash helper (not currently used, kept for future `id` fallbacks when +// randomUUID is unavailable). +export function __hashId(input: string): string { + return createHash("sha1").update(input).digest("hex").slice(0, 8); +} diff --git a/apps/desktop/src/main/todo-agent/trpc-router.ts b/apps/desktop/src/main/todo-agent/trpc-router.ts new file mode 100644 index 00000000000..853f4e20e70 --- /dev/null +++ b/apps/desktop/src/main/todo-agent/trpc-router.ts @@ -0,0 +1,485 @@ +import { randomUUID } from "node:crypto"; +import { rmSync } from "node:fs"; +import path from "node:path"; +import { todoPromptPresets } from "@superset/local-db"; +import { TRPCError } from "@trpc/server"; +import { observable } from "@trpc/server/observable"; +import { desc, eq } from "drizzle-orm"; +import { publicProcedure, router } from "lib/trpc"; +import { localDb } from "main/lib/local-db"; +import { z } from "zod"; +import { describeEnhanceFailure, enhanceTodoText } from "./enhance-text"; +import { + getSessionFileDiff, + getSessionGitSnapshot, + type SessionDiffScope, +} from "./git-status"; +import { getTodoSessionStore, resolveWorktreePath } from "./session-store"; +import { getTodoSupervisor } from "./supervisor"; +import { + TODO_ARTIFACT_SUBDIR, + type TodoSessionStateEvent, + type TodoStreamUpdate, + todoCreateInputSchema, + todoEnhanceTextInputSchema, + todoPresetCreateInputSchema, + todoPresetUpdateInputSchema, + todoSendInputSchema, +} from "./types"; + +/** + * tRPC router for the fork-local TODO autonomous agent feature. + * + * Exposed as `todoAgent.*` on the app router. + */ +export const createTodoAgentRouter = () => { + return router({ + create: publicProcedure + .input(todoCreateInputSchema) + .mutation(async ({ input }) => { + const store = getTodoSessionStore(); + const worktreePath = resolveWorktreePath(input.workspaceId); + if (!worktreePath) { + throw new Error( + `todo-agent: workspace ${input.workspaceId} のパスを解決できませんでした`, + ); + } + + // Compute the final artifact path up-front so the row is + // inserted with its permanent path in one shot. No more + // half-written PENDING rows left behind if the process + // crashes between insert and update. + const sessionId = randomUUID(); + const supervisor = getTodoSupervisor(); + const artifactPath = supervisor.computeArtifactPath({ + sessionId, + workspaceId: input.workspaceId, + }); + + const session = store.insert({ + id: sessionId, + projectId: input.projectId ?? null, + workspaceId: input.workspaceId, + title: input.title, + description: input.description, + goal: input.goal ?? null, + verifyCommand: input.verifyCommand ?? null, + maxIterations: input.maxIterations, + maxWallClockSec: input.maxWallClockSec, + status: "queued", + phase: "queued", + iteration: 0, + attachedPaneId: null, + attachedTabId: null, + claudeSessionId: null, + finalAssistantText: null, + totalCostUsd: null, + totalNumTurns: null, + pendingIntervention: null, + startHeadSha: null, + customSystemPrompt: input.customSystemPrompt ?? null, + verdictPassed: null, + verdictReason: null, + verdictFailingTest: null, + artifactPath, + startedAt: null, + completedAt: null, + }); + + // Materialize the directory + goal.md. If this throws after + // the row exists the user can delete the broken session + // from the Manager — same as any other filesystem error. + supervisor.prepareArtifacts(session); + + return { sessionId: session.id }; + }), + + list: publicProcedure + .input(z.object({ workspaceId: z.string().min(1) })) + .query(({ input }) => + getTodoSessionStore().listForWorkspace(input.workspaceId), + ), + + // Cross-workspace feed used by the Agent-Manager-style view. + listAll: publicProcedure.query(() => getTodoSessionStore().listAll()), + + enhanceText: publicProcedure + .input(todoEnhanceTextInputSchema) + .mutation(async ({ input }) => { + const { text, attempts } = await enhanceTodoText( + input.text, + input.kind, + ); + if (text === null) { + throw new TRPCError({ + code: "INTERNAL_SERVER_ERROR", + message: describeEnhanceFailure(attempts), + }); + } + return { text }; + }), + + get: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .query(({ input }) => getTodoSessionStore().get(input.sessionId)), + + /** + * Kick off the headless claude loop for a queued session. There + * is no pane to attach anymore — the supervisor spawns claude as + * a plain child process in the main process and the Manager + * renders the parsed stream events inline. + */ + start: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .mutation(async ({ input }) => { + const store = getTodoSessionStore(); + const session = store.get(input.sessionId); + if (!session) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "セッションが見つかりません", + }); + } + if ( + session.status !== "queued" && + session.status !== "failed" && + session.status !== "aborted" && + session.status !== "escalated" + ) { + throw new TRPCError({ + code: "PRECONDITION_FAILED", + message: `このセッションは既に ${session.status} 状態なので開始できません`, + }); + } + store.update(input.sessionId, { + status: "preparing", + phase: "preparing", + }); + // Fire-and-forget: the supervisor drives the rest of the loop. + void getTodoSupervisor().start(input.sessionId); + return { ok: true }; + }), + + abort: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .mutation(({ input }) => { + getTodoSupervisor().abort(input.sessionId); + return { ok: true }; + }), + + updateTitle: publicProcedure + .input( + z.object({ + sessionId: z.string().min(1), + title: z.string().trim().min(1).max(200), + }), + ) + .mutation(({ input }) => { + const store = getTodoSessionStore(); + const session = store.get(input.sessionId); + if (!session) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "セッションが見つかりません", + }); + } + store.update(input.sessionId, { title: input.title }); + return { ok: true }; + }), + + delete: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .mutation(({ input }) => { + const store = getTodoSessionStore(); + const session = store.get(input.sessionId); + if (!session) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "セッションが見つかりません", + }); + } + // Best-effort: make sure the supervisor is not still driving + // the session before we wipe its row. abort() is a no-op if + // the session is not currently active. + try { + getTodoSupervisor().abort(input.sessionId); + } catch (error) { + console.warn("[todo-agent] abort-before-delete failed", error); + } + + const removed = store.remove(input.sessionId); + if (!removed) { + throw new TRPCError({ + code: "INTERNAL_SERVER_ERROR", + message: "セッションの削除に失敗しました", + }); + } + + // Best-effort artifact cleanup. Failure to remove the + // directory should not fail the mutation — the DB row is + // already gone and the directory is just scratch data. + try { + const worktreePath = resolveWorktreePath(session.workspaceId); + if (worktreePath) { + const dir = path.join( + worktreePath, + TODO_ARTIFACT_SUBDIR, + session.id, + ); + rmSync(dir, { recursive: true, force: true }); + } + } catch (error) { + console.warn("[todo-agent] artifact cleanup failed", error); + } + + return { ok: true }; + }), + + rerun: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .mutation(({ input }) => { + const store = getTodoSessionStore(); + const source = store.get(input.sessionId); + if (!source) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "元セッションが見つかりません", + }); + } + + // Create a brand-new queued session that copies the user- + // authored fields from the source. Verdict / iteration / + // pane attachment are reset so the new session starts + // clean in the Agent Manager. + const nextId = randomUUID(); + const supervisor = getTodoSupervisor(); + const artifactPath = supervisor.computeArtifactPath({ + sessionId: nextId, + workspaceId: source.workspaceId, + }); + + const next = store.insert({ + id: nextId, + projectId: source.projectId, + workspaceId: source.workspaceId, + title: source.title, + description: source.description, + goal: source.goal, + verifyCommand: source.verifyCommand, + maxIterations: source.maxIterations, + maxWallClockSec: source.maxWallClockSec, + status: "queued", + phase: "queued", + iteration: 0, + attachedPaneId: null, + attachedTabId: null, + claudeSessionId: null, + finalAssistantText: null, + totalCostUsd: null, + totalNumTurns: null, + pendingIntervention: null, + startHeadSha: null, + customSystemPrompt: source.customSystemPrompt, + verdictPassed: null, + verdictReason: null, + verdictFailingTest: null, + artifactPath, + startedAt: null, + completedAt: null, + }); + + supervisor.prepareArtifacts(next); + + return { sessionId: next.id }; + }), + + /** + * Queue a user intervention for the next turn. Headless mode + * cannot inject text mid-stream, so interventions land at the + * next iteration boundary. + */ + sendInput: publicProcedure + .input(todoSendInputSchema) + .mutation(({ input }) => { + getTodoSupervisor().queueIntervention(input.sessionId, input.data); + return { ok: true }; + }), + + /** + * Snapshot of the in-memory stream events buffer for a session. + * Used by the Manager to paint the initial state of the detail + * pane before the subscription takes over. + */ + getStream: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .query(({ input }) => + getTodoSessionStore().getStreamEvents(input.sessionId), + ), + + /** + * Live stream events (assistant text, tool calls, verify results, + * errors) for the selected session. Emits the in-memory tail on + * subscribe then fans out every subsequent append. + */ + /** + * Per-session git snapshot: branch, current vs session-start HEAD, + * commits produced since the session started, working-tree files. + * The right-sidebar in the Manager polls this every few seconds + * while the session is live. + */ + gitSnapshot: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .query(async ({ input }) => { + const session = getTodoSessionStore().get(input.sessionId); + if (!session) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "セッションが見つかりません", + }); + } + const worktreePath = resolveWorktreePath(session.workspaceId); + if (!worktreePath) { + throw new TRPCError({ + code: "PRECONDITION_FAILED", + message: "ワークスペースのパスを解決できませんでした", + }); + } + return getSessionGitSnapshot({ + cwd: worktreePath, + startHeadSha: session.startHeadSha ?? null, + }); + }), + + /** + * Unified diff for a single file at a user-selected scope + * (session-range / staged / unstaged / a specific commit). + */ + gitFileDiff: publicProcedure + .input( + z.object({ + sessionId: z.string().min(1), + path: z.string().min(1), + scope: z.enum(["session", "staged", "unstaged", "commit"]), + commitSha: z.string().optional(), + }), + ) + .query(async ({ input }) => { + const session = getTodoSessionStore().get(input.sessionId); + if (!session) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "セッションが見つかりません", + }); + } + const worktreePath = resolveWorktreePath(session.workspaceId); + if (!worktreePath) return ""; + const diff = await getSessionFileDiff({ + cwd: worktreePath, + startHeadSha: session.startHeadSha ?? null, + path: input.path, + scope: input.scope as SessionDiffScope, + commitSha: input.commitSha, + }); + return diff; + }), + + subscribeStream: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .subscription(({ input }) => { + return observable((emit) => { + const store = getTodoSessionStore(); + const initial = store.getStreamEvents(input.sessionId); + if (initial.length > 0) { + emit.next({ + sessionId: input.sessionId, + events: initial, + }); + } + const unsubscribe = store.subscribeStream(input.sessionId, (update) => + emit.next(update), + ); + return () => unsubscribe(); + }); + }), + + subscribeState: publicProcedure + .input(z.object({ sessionId: z.string().min(1) })) + .subscription(({ input }) => { + return observable((emit) => { + const store = getTodoSessionStore(); + // Emit current state immediately on subscribe. + const current = store.get(input.sessionId); + if (current) { + emit.next({ sessionId: current.id, session: current }); + } + const unsubscribe = store.subscribe(input.sessionId, (event) => { + emit.next(event); + }); + return () => unsubscribe(); + }); + }), + + /** + * CRUD for reusable system-prompt templates the user attaches + * to new TODO sessions. Managed from the Agent Manager's + * Settings panel. + */ + presets: router({ + list: publicProcedure.query(() => + localDb + .select() + .from(todoPromptPresets) + .orderBy(desc(todoPromptPresets.updatedAt)) + .all(), + ), + create: publicProcedure + .input(todoPresetCreateInputSchema) + .mutation(({ input }) => { + const now = Date.now(); + const row = localDb + .insert(todoPromptPresets) + .values({ + name: input.name, + content: input.content, + createdAt: now, + updatedAt: now, + }) + .returning() + .get(); + return row; + }), + update: publicProcedure + .input(todoPresetUpdateInputSchema) + .mutation(({ input }) => { + const row = localDb + .update(todoPromptPresets) + .set({ + name: input.name, + content: input.content, + updatedAt: Date.now(), + }) + .where(eq(todoPromptPresets.id, input.id)) + .returning() + .get(); + if (!row) { + throw new TRPCError({ + code: "NOT_FOUND", + message: "プリセットが見つかりません", + }); + } + return row; + }), + delete: publicProcedure + .input(z.object({ id: z.string().min(1) })) + .mutation(({ input }) => { + const result = localDb + .delete(todoPromptPresets) + .where(eq(todoPromptPresets.id, input.id)) + .run(); + return { ok: result.changes > 0 }; + }), + }), + }); +}; + +export type TodoAgentRouter = ReturnType; diff --git a/apps/desktop/src/main/todo-agent/types.ts b/apps/desktop/src/main/todo-agent/types.ts new file mode 100644 index 00000000000..12eda02811f --- /dev/null +++ b/apps/desktop/src/main/todo-agent/types.ts @@ -0,0 +1,155 @@ +import type { SelectTodoSession } from "@superset/local-db"; +import { z } from "zod"; + +/** + * Row shape returned by the cross-workspace `todoAgent.listAll` query: + * the session fields + the joined workspace / project names so the + * Agent-Manager view can group and label rows without N+1 queries. + */ +export interface TodoSessionListEntry extends SelectTodoSession { + workspaceName: string | null; + workspaceBranch: string | null; + projectName: string | null; +} + +export const todoCreateInputSchema = z.object({ + workspaceId: z.string().min(1), + projectId: z.string().optional(), + title: z.string().min(1).max(200), + description: z.string().min(1).max(10_000), + // Optional: when omitted, the session treats "やって欲しいこと + // (description) が完了したとき" as the implicit goal. + goal: z + .string() + .trim() + .max(10_000) + .optional() + .transform((v) => (v && v.length > 0 ? v : undefined)), + // Optional: when omitted, the session runs as a single-turn task + // (research / investigation / one-shot). When provided, it is the + // decisive gate for the iteration loop. + verifyCommand: z + .string() + .trim() + .max(10_000) + .optional() + .transform((v) => (v && v.length > 0 ? v : undefined)), + maxIterations: z.number().int().min(1).max(100).default(10), + maxWallClockSec: z + .number() + .int() + .min(60) + .max(60 * 60 * 4) + .default(1800), + // Optional free-form text the user attached at creation time, + // usually pulled from a saved preset. Passed to claude via + // `--append-system-prompt` so the session steering stays + // consistent across iterations without having to repeat it in + // every turn's prompt. + customSystemPrompt: z + .string() + .trim() + .max(20_000) + .optional() + .transform((v) => (v && v.length > 0 ? v : undefined)), +}); + +export const todoPresetCreateInputSchema = z.object({ + name: z.string().trim().min(1).max(120), + content: z.string().trim().min(1).max(20_000), +}); + +export const todoPresetUpdateInputSchema = z.object({ + id: z.string().min(1), + name: z.string().trim().min(1).max(120), + content: z.string().trim().min(1).max(20_000), +}); + +export const todoEnhanceTextInputSchema = z.object({ + text: z.string().trim().min(1).max(10_000), + kind: z.enum(["description", "goal"]), +}); + +export type TodoEnhanceTextInput = z.infer; + +export type TodoCreateInput = z.infer; + +export const todoAttachPaneInputSchema = z.object({ + sessionId: z.string().min(1), + tabId: z.string().min(1), + paneId: z.string().min(1), +}); + +export type TodoAttachPaneInput = z.infer; + +export const todoSendInputSchema = z.object({ + sessionId: z.string().min(1), + data: z.string().min(1), +}); + +export type TodoSendInput = z.infer; + +/** + * Event published on state changes so the tRPC subscription can fan out to + * the renderer. Kept small and serializable. + */ +export interface TodoSessionStateEvent { + sessionId: string; + session: SelectTodoSession; +} + +export type TodoSessionPhase = + | "queued" + | "preparing" + | "running" + | "verifying" + | "done" + | "failed" + | "escalated" + | "aborted" + | "paused"; + +export const TODO_ARTIFACT_SUBDIR = ".superset/todo"; + +// ---- Headless stream-json events ---- +// +// These types describe the NDJSON messages Claude Code emits on stdout when +// invoked with `-p --output-format stream-json`. We do not attempt to cover +// the full schema; we only name the shapes the TODO supervisor needs to +// reason about. Unknown event types fall through as the base `raw` variant. +// See: https://code.claude.com/docs/en/headless + +export type TodoStreamEventKind = + | "system_init" + | "assistant_text" + | "tool_use" + | "tool_result" + | "result" + | "error" + | "raw"; + +/** + * One condensed event we store in the per-session in-memory buffer and send + * over the subscription. Raw NDJSON is kept for the `raw` variant so the UI + * can always show unparsed context for debugging. + */ +export interface TodoStreamEvent { + /** Stable id so React can key on it without re-rendering siblings. */ + id: string; + /** Millisecond timestamp when the event was observed by the supervisor. */ + ts: number; + /** Turn number this event belongs to (1-based, bumped on each iteration). */ + iteration: number; + kind: TodoStreamEventKind; + /** One-line label used by the renderer (e.g. "User", "Claude", "Bash"). */ + label: string; + /** Human-readable body text, already stripped of ANSI. */ + text: string; + /** Optional raw payload for the "raw" / debug kind. */ + raw?: unknown; +} + +export interface TodoStreamUpdate { + sessionId: string; + events: TodoStreamEvent[]; +} diff --git a/apps/desktop/src/renderer/features/todo-agent/TodoButton/TodoButton.tsx b/apps/desktop/src/renderer/features/todo-agent/TodoButton/TodoButton.tsx new file mode 100644 index 00000000000..c2fce08b099 --- /dev/null +++ b/apps/desktop/src/renderer/features/todo-agent/TodoButton/TodoButton.tsx @@ -0,0 +1,88 @@ +import { Button } from "@superset/ui/button"; +import { cn } from "@superset/ui/utils"; +import { memo, useCallback, useState } from "react"; +import { HiMiniListBullet } from "react-icons/hi2"; +import { electronTrpc } from "renderer/lib/electron-trpc"; +import { TodoManager } from "../TodoManager"; +import { TodoModal } from "../TodoModal"; + +interface TodoButtonProps { + projectId?: string | null; + workspaceId: string; + worktreePath?: string | null; +} + +/** + * Entry point for the fork-local TODO autonomous agent feature. Sits + * immediately left of the WorkspaceRunButton in PresetsBar. + * + * Clicking the button opens the Agent-Manager-style TodoManager drawer. + * Session creation lives inside the manager so users always see the + * context of what already exists before creating something new. + */ +export const TodoButton = memo(function TodoButton({ + projectId, + workspaceId, +}: TodoButtonProps) { + const [managerOpen, setManagerOpen] = useState(false); + const [modalOpen, setModalOpen] = useState(false); + + const { data: sessions } = electronTrpc.todoAgent.list.useQuery( + { workspaceId }, + { enabled: !!workspaceId, refetchInterval: 3000 }, + ); + + const activeCount = (sessions ?? []).filter( + (session) => + session.status === "queued" || + session.status === "preparing" || + session.status === "running" || + session.status === "verifying", + ).length; + + const handleRequestNewTodo = useCallback(() => { + setModalOpen(true); + }, []); + + return ( + <> + + + {/* + Rendered as a sibling of TodoManager rather than inside it so + the two shadcn Dialogs stack independently. The modal opens + on top of the Manager without the outer Dialog's + click-outside handlers interfering. + */} + + + ); +}); diff --git a/apps/desktop/src/renderer/features/todo-agent/TodoButton/index.ts b/apps/desktop/src/renderer/features/todo-agent/TodoButton/index.ts new file mode 100644 index 00000000000..8a8676c99f1 --- /dev/null +++ b/apps/desktop/src/renderer/features/todo-agent/TodoButton/index.ts @@ -0,0 +1 @@ +export { TodoButton } from "./TodoButton"; diff --git a/apps/desktop/src/renderer/features/todo-agent/TodoManager/ChangesSidebar/ChangesSidebar.tsx b/apps/desktop/src/renderer/features/todo-agent/TodoManager/ChangesSidebar/ChangesSidebar.tsx new file mode 100644 index 00000000000..38ac9a5099d --- /dev/null +++ b/apps/desktop/src/renderer/features/todo-agent/TodoManager/ChangesSidebar/ChangesSidebar.tsx @@ -0,0 +1,396 @@ +import { ScrollArea } from "@superset/ui/scroll-area"; +import { cn } from "@superset/ui/utils"; +import { useMemo, useState } from "react"; +import { + HiMiniArrowPath, + HiMiniChevronDown, + HiMiniChevronRight, +} from "react-icons/hi2"; +import { electronTrpc } from "renderer/lib/electron-trpc"; + +interface ChangesSidebarProps { + sessionId: string; + active: boolean; +} + +type DiffScope = "session" | "staged" | "unstaged" | "commit"; + +interface SelectedDiff { + key: string; + path: string; + scope: DiffScope; + commitSha?: string; + label: string; +} + +/** + * Right-side panel inside the TODO Agent Manager that surfaces the git + * work the worker produced in a session. Relies on the per-session + * `startHeadSha` the supervisor captures at run start to scope commits + * to "this session only" via `git log startHeadSha..HEAD`. + */ +export function ChangesSidebar({ sessionId, active }: ChangesSidebarProps) { + const [selected, setSelected] = useState(null); + const [commitsOpen, setCommitsOpen] = useState(true); + const [workingTreeOpen, setWorkingTreeOpen] = useState(true); + + const snapshot = electronTrpc.todoAgent.gitSnapshot.useQuery( + { sessionId }, + { + refetchInterval: active ? 3000 : false, + staleTime: 1000, + }, + ); + + const diffQuery = electronTrpc.todoAgent.gitFileDiff.useQuery( + selected + ? { + sessionId, + path: selected.path, + scope: selected.scope, + commitSha: selected.commitSha, + } + : { sessionId, path: "", scope: "session" as const }, + { enabled: !!selected, staleTime: 5_000 }, + ); + + const utils = electronTrpc.useUtils(); + const handleRefresh = () => { + void utils.todoAgent.gitSnapshot.invalidate({ sessionId }); + if (selected) { + void utils.todoAgent.gitFileDiff.invalidate({ + sessionId, + path: selected.path, + scope: selected.scope, + commitSha: selected.commitSha, + }); + } + }; + + const data = snapshot.data; + const commits = data?.commits ?? []; + const workingTree = data?.workingTree ?? []; + + const stagedCount = useMemo( + () => workingTree.filter((f) => f.stage === "staged").length, + [workingTree], + ); + const unstagedCount = useMemo( + () => workingTree.filter((f) => f.stage === "unstaged").length, + [workingTree], + ); + const untrackedCount = useMemo( + () => workingTree.filter((f) => f.stage === "untracked").length, + [workingTree], + ); + + return ( +
+
+
+
+ 変更 +
+
+ {data?.branch ? ( + {data.branch} + ) : ( + (ブランチ取得中…) + )} +
+
+ +
+ + +
+ {data?.startHeadSha && ( +
+
+ 開始時 HEAD +
+
+ {data.startHeadSha.slice(0, 12)} + {data.currentHeadSha && + data.currentHeadSha !== data.startHeadSha ? ( + <> + {" → "} + + {data.currentHeadSha.slice(0, 12)} + + + ) : null} +
+ {(data.ahead > 0 || data.behind > 0) && ( +
+ ↑ {data.ahead} · ↓ {data.behind} +
+ )} +
+ )} + + {!data?.startHeadSha && snapshot.isSuccess && ( +
+ 開始時 HEAD が記録されていません。Start して最初のターンに入ると + このパネルに差分とコミット履歴が表示されます。 +
+ )} + + {/* Commits since session start */} +
+ + {commitsOpen && ( +
+ {commits.length === 0 ? ( +

+ このセッションでの新規コミットはありません。 +

+ ) : ( + commits.map((commit) => ( + + )) + )} +
+ )} +
+ + {/* Working tree */} +
+ + {workingTreeOpen && ( +
+ {workingTree.length === 0 ? ( +

+ ワーキングツリーは clean です。 +

+ ) : ( + workingTree.map((file) => { + const key = `wt:${file.stage}:${file.path}`; + const scope: DiffScope = + file.stage === "staged" ? "staged" : "unstaged"; + const canDiff = + file.stage !== "untracked" && file.code !== "D"; + return ( + + ); + }) + )} +
+ )} +
+ + {/* Diff viewer for the currently selected file/commit */} + {selected && ( +
+
+
+ {selected.scope === "commit" + ? `コミット ${selected.label}` + : `${selected.scope === "staged" ? "staged" : "unstaged"} · ${selected.label}`} +
+ +
+ +
+ )} +
+
+
+ ); +} + +function StatusBadge({ code, stage }: { code: string; stage: string }) { + const { letter, color } = useMemo(() => { + if (stage === "untracked") { + return { letter: "?", color: "text-muted-foreground" }; + } + switch (code) { + case "M": + return { letter: "M", color: "text-amber-500" }; + case "A": + return { letter: "A", color: "text-emerald-500" }; + case "D": + return { letter: "D", color: "text-rose-500" }; + case "R": + return { letter: "R", color: "text-primary" }; + default: + return { letter: code || "·", color: "text-muted-foreground" }; + } + }, [code, stage]); + return ( + + {letter} + + ); +} + +function DiffBlock({ + content, + loading, +}: { + content: string; + loading: boolean; +}) { + if (loading && !content) { + return ( +
読み込み中…
+ ); + } + if (!content.trim()) { + return ( +
+ 差分はありません。 +
+ ); + } + const lines = content.split("\n"); + return ( +
+			
+				{lines.map((line, idx) => (
+					
+ {line || " "} +
+ ))} +
+
+ ); +} + +function formatShortDate(iso: string): string { + if (!iso) return ""; + const d = new Date(iso); + if (Number.isNaN(d.getTime())) return iso; + const pad = (n: number) => n.toString().padStart(2, "0"); + return `${pad(d.getMonth() + 1)}/${pad(d.getDate())} ${pad(d.getHours())}:${pad(d.getMinutes())}`; +} diff --git a/apps/desktop/src/renderer/features/todo-agent/TodoManager/ChangesSidebar/index.ts b/apps/desktop/src/renderer/features/todo-agent/TodoManager/ChangesSidebar/index.ts new file mode 100644 index 00000000000..047b3bbef5f --- /dev/null +++ b/apps/desktop/src/renderer/features/todo-agent/TodoManager/ChangesSidebar/index.ts @@ -0,0 +1 @@ +export { ChangesSidebar } from "./ChangesSidebar"; diff --git a/apps/desktop/src/renderer/features/todo-agent/TodoManager/PresetsDialog/PresetsDialog.tsx b/apps/desktop/src/renderer/features/todo-agent/TodoManager/PresetsDialog/PresetsDialog.tsx new file mode 100644 index 00000000000..49b4cfaa0e3 --- /dev/null +++ b/apps/desktop/src/renderer/features/todo-agent/TodoManager/PresetsDialog/PresetsDialog.tsx @@ -0,0 +1,282 @@ +import type { SelectTodoPromptPreset } from "@superset/local-db"; +import { Button } from "@superset/ui/button"; +import { Dialog, DialogContent, DialogTitle } from "@superset/ui/dialog"; +import { Input } from "@superset/ui/input"; +import { ScrollArea } from "@superset/ui/scroll-area"; +import { toast } from "@superset/ui/sonner"; +import { Textarea } from "@superset/ui/textarea"; +import { cn } from "@superset/ui/utils"; +import { useCallback, useEffect, useMemo, useState } from "react"; +import { HiMiniPlus, HiMiniTrash, HiMiniXMark } from "react-icons/hi2"; +import { electronTrpc } from "renderer/lib/electron-trpc"; + +interface PresetsDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; +} + +/** + * Manager for reusable TODO system-prompt templates. Entered from the + * "設定" row at the bottom of the Agent Manager's left sidebar. + * Two-pane layout: list of presets on the left, edit form on the right. + */ +export function PresetsDialog({ open, onOpenChange }: PresetsDialogProps) { + const utils = electronTrpc.useUtils(); + const { data: presets } = electronTrpc.todoAgent.presets.list.useQuery( + undefined, + { enabled: open }, + ); + + const [selectedId, setSelectedId] = useState(null); + const [draft, setDraft] = useState<{ + id: string | null; + name: string; + content: string; + }>({ id: null, name: "", content: "" }); + const [confirmingDelete, setConfirmingDelete] = useState(false); + + const createMut = electronTrpc.todoAgent.presets.create.useMutation(); + const updateMut = electronTrpc.todoAgent.presets.update.useMutation(); + const deleteMut = electronTrpc.todoAgent.presets.delete.useMutation(); + + const invalidate = useCallback( + () => utils.todoAgent.presets.list.invalidate(), + [utils], + ); + + const selected = useMemo( + () => + (presets ?? []).find( + (p: SelectTodoPromptPreset) => p.id === selectedId, + ) ?? null, + [presets, selectedId], + ); + + // Sync draft with selection changes. + useEffect(() => { + if (selected) { + setDraft({ + id: selected.id, + name: selected.name, + content: selected.content, + }); + } else { + setDraft({ id: null, name: "", content: "" }); + } + setConfirmingDelete(false); + }, [selected]); + + const dirty = + !!draft.name.trim() && + !!draft.content.trim() && + (!selected || + draft.name !== selected.name || + draft.content !== selected.content); + + const handleNew = useCallback(() => { + setSelectedId(null); + setDraft({ id: null, name: "", content: "" }); + }, []); + + const handleSave = useCallback(async () => { + try { + if (draft.id) { + const row = await updateMut.mutateAsync({ + id: draft.id, + name: draft.name.trim(), + content: draft.content.trim(), + }); + setSelectedId(row.id); + toast.success("プリセットを更新しました"); + } else { + const row = await createMut.mutateAsync({ + name: draft.name.trim(), + content: draft.content.trim(), + }); + setSelectedId(row.id); + toast.success("プリセットを作成しました"); + } + await invalidate(); + } catch (error) { + toast.error( + error instanceof Error ? error.message : "保存に失敗しました", + ); + } + }, [createMut, draft, invalidate, updateMut]); + + const handleDelete = useCallback(async () => { + if (!draft.id) return; + try { + await deleteMut.mutateAsync({ id: draft.id }); + await invalidate(); + setSelectedId(null); + setConfirmingDelete(false); + toast.success("プリセットを削除しました"); + } catch (error) { + toast.error( + error instanceof Error ? error.message : "削除に失敗しました", + ); + } + }, [deleteMut, draft.id, invalidate]); + + return ( + + + + システムプロンプトテンプレート + +
+
+ + システムプロンプトテンプレート + + + TODO に付けられる再利用プロンプト + +
+ +
+ +
+
+
+ +
+ +
+ {(presets ?? []).length === 0 && ( +

+ まだプリセットはありません。右上から新規作成してください。 +

+ )} + {(presets ?? []).map((preset: SelectTodoPromptPreset) => ( + + ))} +
+
+
+ +
+
+ + + setDraft((d) => ({ ...d, name: e.target.value })) + } + placeholder="例: 日本語で返答" + maxLength={120} + className="rounded-md" + /> +
+
+ +