diff --git a/apps/desktop/src/renderer/components/Chat/ChatInterface/components/ChatInputFooter/ChatInputFooter.tsx b/apps/desktop/src/renderer/components/Chat/ChatInterface/components/ChatInputFooter/ChatInputFooter.tsx index 1cceca08b2e..532f018ce19 100644 --- a/apps/desktop/src/renderer/components/Chat/ChatInterface/components/ChatInputFooter/ChatInputFooter.tsx +++ b/apps/desktop/src/renderer/components/Chat/ChatInterface/components/ChatInputFooter/ChatInputFooter.tsx @@ -1,3 +1,4 @@ +import { chatServiceTrpc } from "@superset/chat/client"; import { PromptInput, PromptInputAttachment, @@ -110,6 +111,34 @@ export function ChatInputFooter({ setLinkedIssues((prev) => prev.filter((issue) => issue.slug !== slug)); }, []); + const trpcUtils = chatServiceTrpc.useUtils(); + const searchFiles = useCallback( + async (query: string) => { + const results = await trpcUtils.workspace.searchFiles.fetch({ + rootPath: cwd, + query, + includeHidden: false, + limit: 20, + }); + return results.map((r) => ({ + id: r.id, + name: r.name, + relativePath: r.relativePath, + })); + }, + [trpcUtils, cwd], + ); + const previewSlashCommand = useCallback( + async (text: string) => { + const result = await trpcUtils.workspace.previewSlashCommand.fetch({ + cwd, + text, + }); + return result ?? null; + }, + [trpcUtils, cwd], + ); + const handleSend = useCallback( (message: PromptInputMessage) => { if (linkedIssues.length === 0) return onSend(message); @@ -177,6 +206,8 @@ export function ChatInputFooter({ /> Promise; + interface SlashCommandPreviewPopoverProps { cwd: string; + previewSlashCommand: PreviewSlashCommandFn; slashCommands: Array<{ name: string; aliases: string[]; @@ -24,6 +33,7 @@ interface SlashCommandPreviewPopoverProps { export function SlashCommandPreviewPopover({ cwd, + previewSlashCommand, slashCommands, editor, isFocused, @@ -59,15 +69,21 @@ export function SlashCommandPreviewPopover({ const parsedInput = useMemo(() => parseSlashInput(inputValue), [inputValue]); const debouncedSlashPreviewInput = useDebouncedValue(slashPreviewInput, 120); - const { data: slashPreview } = - chatServiceTrpc.workspace.previewSlashCommand.useQuery( - { cwd, text: debouncedSlashPreviewInput }, - { - enabled: debouncedSlashPreviewInput.length > 1 && !!cwd, - staleTime: 250, - placeholderData: (previous) => previous, - }, - ); + const [slashPreview, setSlashPreview] = useState(null); + useEffect(() => { + if (debouncedSlashPreviewInput.length <= 1 || !cwd) return; + let cancelled = false; + previewSlashCommand(debouncedSlashPreviewInput) + .then((result) => { + if (!cancelled) setSlashPreview(result); + }) + .catch(() => { + // Empty preview on error — popover degrades gracefully. + }); + return () => { + cancelled = true; + }; + }, [cwd, debouncedSlashPreviewInput, previewSlashCommand]); const commandDefinition = useMemo(() => { if (!parsedInput?.commandName) return null; diff --git a/apps/desktop/src/renderer/components/Chat/ChatInterface/components/TiptapPromptEditor/TiptapPromptEditor.tsx b/apps/desktop/src/renderer/components/Chat/ChatInterface/components/TiptapPromptEditor/TiptapPromptEditor.tsx index e7ea31c55f5..bde280ae7df 100644 --- a/apps/desktop/src/renderer/components/Chat/ChatInterface/components/TiptapPromptEditor/TiptapPromptEditor.tsx +++ b/apps/desktop/src/renderer/components/Chat/ChatInterface/components/TiptapPromptEditor/TiptapPromptEditor.tsx @@ -1,4 +1,3 @@ -import { chatServiceTrpc } from "@superset/chat/client"; import { usePromptInputAttachments, usePromptInputController, @@ -41,10 +40,14 @@ import { SlashCommandMenu } from "../SlashCommandMenu"; import { FileMentionNode } from "./FileMentionNode"; import { parseTextToEditorContent } from "./parseTextToEditorContent"; import { SlashCommandNode } from "./SlashCommandNode"; -import { SlashCommandPreviewPopover } from "./SlashCommandPreviewPopover"; +import { + type PreviewSlashCommandFn, + SlashCommandPreviewPopover, +} from "./SlashCommandPreviewPopover"; import { serializeEditorToText } from "./serializeEditorToText"; type FileResult = { id: string; name: string; relativePath: string }; +type SearchFilesFn = (query: string) => Promise; type SlashMenuState = { commands: SlashCommand[]; @@ -61,6 +64,8 @@ type MentionState = { export interface TiptapPromptEditorProps { cwd: string; + searchFiles: SearchFilesFn; + previewSlashCommand?: PreviewSlashCommandFn; slashCommands: SlashCommand[]; availableModels?: ModelOption[]; placeholder?: string; @@ -76,6 +81,8 @@ function getDirectoryPath(relativePath: string): string { export function TiptapPromptEditor({ cwd, + searchFiles, + previewSlashCommand, slashCommands, availableModels, placeholder = "Ask to make changes, @mention files, run /commands", @@ -139,28 +146,25 @@ export function TiptapPromptEditor({ mentionState?.query ?? "", 120, ); - const { data: fileResults } = chatServiceTrpc.workspace.searchFiles.useQuery( - { - rootPath: cwd, - query: debouncedMentionQuery, - includeHidden: false, - limit: 20, - }, - { - enabled: - !!mentionState && - !!cwd && - debouncedMentionQuery.length > 0 && - (mentionState?.query?.length ?? 0) > 0, - staleTime: 1000, - placeholderData: (prev) => prev ?? [], - }, - ); + const isMentionVisible = + mentionState !== null && (mentionState?.query?.length ?? 0) > 0; + const [fileResults, setFileResults] = useState([]); + useEffect(() => { + if (!isMentionVisible || !cwd || debouncedMentionQuery.length === 0) return; + let cancelled = false; + searchFiles(debouncedMentionQuery) + .then((results) => { + if (!cancelled) setFileResults(results); + }) + .catch(() => { + // Empty results on error — mention popup degrades gracefully. + }); + return () => { + cancelled = true; + }; + }, [debouncedMentionQuery, cwd, isMentionVisible, searchFiles]); - const mentionFiles: FileResult[] = - mentionState && (mentionState.query?.length ?? 0) > 0 - ? (fileResults ?? []) - : []; + const mentionFiles: FileResult[] = isMentionVisible ? fileResults : []; const mentionFilesRef = useRef(mentionFiles); mentionFilesRef.current = mentionFiles; @@ -634,10 +638,13 @@ export function TiptapPromptEditor({ return ( <> - {/* Slash command params popover — anchored to the chip node */} - {editor && ( + {/* Slash command params popover — anchored to the chip node. + Only rendered when the parent provides a previewSlashCommand + function; v2 ChatPane uses its own SlashCommandPreview instead. */} + {editor && previewSlashCommand && ( ["chat"]["getSlashCommands"] + >, + ) => + commands.map((command) => ({ + ...command, + kind: + command.kind === "builtin" + ? ("builtin" as const) + : ("custom" as const), + source: + command.kind === "builtin" + ? ("builtin" as const) + : ("project" as const), + })), + [], + ); + const { data: slashCommands = [] } = workspaceTrpc.chat.getSlashCommands.useQuery( - { sessionId: sessionId ?? "", workspaceId }, - { - enabled: Boolean(sessionId), - select: (commands) => - commands.map((command) => ({ - ...command, - kind: - command.kind === "builtin" - ? ("builtin" as const) - : ("custom" as const), - source: - command.kind === "builtin" - ? ("builtin" as const) - : ("project" as const), - })), - }, + { workspaceId }, + { select: selectSlashCommands }, ); const chat = useChatDisplay({ sessionId, workspaceId, enabled: Boolean(sessionId), - fps: 60, }); const { commands, @@ -331,38 +341,20 @@ export function ChatPaneInterface({ const sendMessageToSession = useCallback( async (targetSessionId: string, input: ChatSendMessageInput) => { - const queryInput = { + // Optimistic state for this path lives in `pendingUserTurn` (set by + // the caller in handleSend), NOT in the snapshot cache. Writing to + // the cache here was racing with the 4fps snapshot polls — a poll + // could resolve mid-mutation with the harness's pre-message state + // and clobber the optimistic write, making the user message vanish + // briefly. The pendingUserTurn local state is merged in via + // getVisibleMessagesWithPendingUserTurn so it survives stale polls. + await sendMessageMutation.mutateAsync({ sessionId: targetSessionId, workspaceId, - }; - const optimisticMessage = toOptimisticUserMessage(input); - if (optimisticMessage) { - workspaceTrpcUtils.chat.listMessages.setData( - queryInput, - (existingMessages = []) => [...existingMessages, optimisticMessage], - ); - } - - try { - await sendMessageMutation.mutateAsync({ - sessionId: targetSessionId, - workspaceId, - ...input, - }); - } catch (error) { - if (optimisticMessage) { - workspaceTrpcUtils.chat.listMessages.setData( - queryInput, - (existingMessages = []) => - existingMessages.filter( - (message) => message.id !== optimisticMessage.id, - ), - ); - } - throw error; - } + ...input, + }); }, - [workspaceTrpcUtils.chat.listMessages, sendMessageMutation, workspaceId], + [sendMessageMutation, workspaceId], ); const canAbort = Boolean(isRunning); @@ -600,7 +592,25 @@ export function ChatPaneInterface({ if (sessionId && targetSessionId === sessionId) { await commands.sendMessage(sendInput); } else { - await sendMessageToSession(targetSessionId, sendInput); + // New-session path: the existing-session path's optimistic + // state lives inside useChatDisplay, but we don't have a + // session subscribed there yet. Hold the user message in + // pendingUserTurn so getVisibleMessagesWithPendingUserTurn + // keeps it visible across stale snapshot polls until the + // harness's response includes it. + const optimisticMessage = toOptimisticUserMessage(sendInput); + if (optimisticMessage) { + setPendingUserTurn({ + kind: "append", + message: optimisticMessage, + }); + } + try { + await sendMessageToSession(targetSessionId, sendInput); + } catch (error) { + setPendingUserTurn(null); + throw error; + } } if (content) { onUserMessageSubmitted?.(content); diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ChatInputFooter/ChatInputFooter.tsx b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ChatInputFooter/ChatInputFooter.tsx index d0a37e633aa..5b33af4e68e 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ChatInputFooter/ChatInputFooter.tsx +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ChatInputFooter/ChatInputFooter.tsx @@ -6,6 +6,7 @@ import { usePromptInputController, } from "@superset/ui/ai-elements/prompt-input"; import type { ThinkingLevel } from "@superset/ui/ai-elements/thinking-toggle"; +import { workspaceTrpc } from "@superset/workspace-client"; import type { ChatStatus, FileUIPart } from "ai"; import type React from "react"; import type { ReactNode } from "react"; @@ -29,7 +30,6 @@ import type { LinkedIssue } from "./types"; import { getErrorMessage } from "./utils/getErrorMessage"; interface ChatInputFooterProps { - sessionId: string | null; workspaceId: string; cwd: string; isFocused: boolean; @@ -63,7 +63,6 @@ interface ChatInputFooterProps { } export function ChatInputFooter({ - sessionId, workspaceId, cwd, isFocused, @@ -115,6 +114,24 @@ export function ChatInputFooter({ setLinkedIssues((prev) => prev.filter((issue) => issue.slug !== slug)); }, []); + const trpcUtils = workspaceTrpc.useUtils(); + const searchFiles = useCallback( + async (query: string) => { + const { matches } = await trpcUtils.filesystem.searchFiles.fetch({ + workspaceId, + query, + includeHidden: false, + limit: 20, + }); + return matches.map((m) => ({ + id: m.absolutePath, + name: m.name, + relativePath: m.relativePath, + })); + }, + [trpcUtils, workspaceId], + ); + const handleSend = useCallback( (message: PromptInputMessage) => { if (linkedIssues.length === 0) return onSend(message); @@ -187,12 +204,12 @@ export function ChatInputFooter({ onRemove={removeLinkedIssue} /> (null); useEffect(() => { - if (!sessionId || debouncedSlashPreviewInput.length <= 1) { + if (debouncedSlashPreviewInput.length <= 1) { setSlashPreview(null); return; } @@ -80,7 +78,6 @@ export function SlashCommandPreview({ let cancelled = false; void previewSlashCommand .mutateAsync({ - sessionId, workspaceId, text: debouncedSlashPreviewInput, }) @@ -107,7 +104,7 @@ export function SlashCommandPreview({ return () => { cancelled = true; }; - }, [debouncedSlashPreviewInput, previewSlashCommand, sessionId, workspaceId]); + }, [debouncedSlashPreviewInput, previewSlashCommand, workspaceId]); const commandDefinition = useMemo(() => { if (!parsedInput?.commandName) return null; diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ModelPicker/ModelPicker.tsx b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ModelPicker/ModelPicker.tsx index 31bea858f62..d9b03ee2a41 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ModelPicker/ModelPicker.tsx +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ModelPicker/ModelPicker.tsx @@ -9,9 +9,10 @@ import { } from "@superset/ui/ai-elements/model-selector"; import { PromptInputButton } from "@superset/ui/ai-elements/prompt-input"; import { claudeIcon } from "@superset/ui/icons/preset-icons"; +import { workspaceTrpc } from "@superset/workspace-client"; import { useNavigate } from "@tanstack/react-router"; import { ChevronDownIcon } from "lucide-react"; -import { useMemo } from "react"; +import { useEffect, useMemo } from "react"; import { PILL_BUTTON_CLASS } from "renderer/components/Chat/ChatInterface/styles"; import type { ModelOption } from "renderer/components/Chat/ChatInterface/types"; import { ModelProviderGroup } from "./components/ModelProviderGroup"; @@ -41,6 +42,15 @@ export function ModelPicker({ const selectedLogo = selectedModel ? providerToLogo(selectedModel.provider) : null; + const { data: anthropicStatus, refetch: refetchAnthropicStatus } = + workspaceTrpc.auth.getAnthropicStatus.useQuery(); + const { data: openAIStatus, refetch: refetchOpenAIStatus } = + workspaceTrpc.auth.getOpenAIStatus.useQuery(); + + useEffect(() => { + if (!open) return; + void Promise.all([refetchAnthropicStatus(), refetchOpenAIStatus()]); + }, [open, refetchAnthropicStatus, refetchOpenAIStatus]); const openModelsSettings = () => { onOpenChange(false); @@ -71,11 +81,11 @@ export function ModelPicker({ key={provider} provider={provider} models={providerModels} - isAnthropicAuthenticated={true} + isAnthropicAuthenticated={anthropicStatus?.authenticated ?? false} isAnthropicOAuthPending={false} isAnthropicApiKeyPending={false} onOpenAnthropicAuthModal={openModelsSettings} - isOpenAIAuthenticated={true} + isOpenAIAuthenticated={openAIStatus?.authenticated ?? false} isOpenAIOAuthPending={false} isOpenAIApiKeyPending={false} onOpenOpenAIAuthModal={openModelsSettings} diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/hooks/useSlashCommandExecutor/useSlashCommandExecutor.ts b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/hooks/useSlashCommandExecutor/useSlashCommandExecutor.ts index a89531f1073..d2f74cec0b7 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/hooks/useSlashCommandExecutor/useSlashCommandExecutor.ts +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/hooks/useSlashCommandExecutor/useSlashCommandExecutor.ts @@ -9,6 +9,7 @@ import { findModelByQuery, normalizeModelQueryFromActionArgument, } from "./model-query"; +import { resolveSlashPromptResult } from "./prompt-result"; interface UseSlashCommandExecutorOptions { sessionId: string | null; @@ -49,6 +50,8 @@ export function useSlashCommandExecutor({ onTrackEvent, }: UseSlashCommandExecutorOptions) { const workspaceTrpcUtils = workspaceTrpc.useUtils(); + const { mutateAsync: resolveSlashCommandMutateAsync } = + workspaceTrpc.chat.resolveSlashCommand.useMutation(); const resolveSlashCommandInput = useCallback( async (inputText: string): Promise => { @@ -57,21 +60,6 @@ export function useSlashCommandExecutor({ return { handled: false, nextText: text }; } - if (!sessionId) { - if (text === "/new" || text === "/clear") { - onClearError(); - await onResetSession(); - toast.success( - text === "/clear" - ? "Context cleared in a new chat session" - : "Started a new chat session", - ); - return { handled: true, nextText: "" }; - } - - return { handled: false, nextText: text }; - } - try { const [commandNameRaw, ...rest] = text.slice(1).split(/\s+/); const commandName = commandNameRaw?.toLowerCase() ?? ""; @@ -164,8 +152,41 @@ export function useSlashCommandExecutor({ }); return { handled: true, nextText: "" }; } - default: - return { handled: false, nextText: text }; + default: { + // Custom slash command — resolve via host-service so prompts + // from .claude/commands and .agents/commands get substituted. + // Workspace-scoped: works whether or not a session exists yet. + const resolved = await resolveSlashCommandMutateAsync({ + workspaceId, + text, + }); + if (!resolved.handled) { + return { handled: false, nextText: text }; + } + const promptResolution = resolveSlashPromptResult({ + handled: resolved.handled, + prompt: resolved.prompt, + commandName: resolved.commandName, + invokedAs: resolved.invokedAs, + }); + if (promptResolution.errorMessage) { + onSetErrorMessage(promptResolution.errorMessage); + toast.error(promptResolution.errorMessage); + return { handled: true, nextText: "" }; + } + onClearError(); + if (promptResolution.handled) { + onTrackEvent?.("chat_slash_command_used", { + command_name: + resolved.invokedAs ?? resolved.commandName ?? commandName, + command_type: "prompt", + }); + } + return { + handled: promptResolution.handled, + nextText: promptResolution.nextText, + }; + } } } catch (error) { console.warn( @@ -189,6 +210,7 @@ export function useSlashCommandExecutor({ loadMcpOverview, onResetSession, onStopActiveResponse, + resolveSlashCommandMutateAsync, sessionId, workspaceId, workspaceTrpcUtils.chat.getMcpOverview, diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatController/useWorkspaceChatController.ts b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatController/useWorkspaceChatController.ts index 3df58c50391..506e30c2782 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatController/useWorkspaceChatController.ts +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatController/useWorkspaceChatController.ts @@ -63,6 +63,7 @@ export function useWorkspaceChatController({ session?.session?.activeOrganizationId, ); const collections = useCollections(); + const endSessionMutation = workspaceTrpc.chat.endSession.useMutation(); const { chatSessions: chatSessionActions } = useOptimisticCollectionActions(); const { data: workspace } = workspaceTrpc.workspace.get.useQuery( @@ -100,6 +101,11 @@ export function useWorkspaceChatController({ if (!transaction && !isDesktopChatDevMode()) { throw new Error("Failed to delete chat session"); } + // Tear down the host-service in-memory runtime so it doesn't leak. + // Failures here must not block the user-visible delete. + void endSessionMutation + .mutateAsync({ sessionId: sessionIdToDelete, workspaceId }) + .catch(() => {}); posthog.capture("chat_session_deleted", { workspace_id: workspaceId, @@ -112,6 +118,7 @@ export function useWorkspaceChatController({ }, [ chatSessionActions, + endSessionMutation, onSessionIdChange, organizationId, sessionId, diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatDisplay/useWorkspaceChatDisplay.ts b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatDisplay/useWorkspaceChatDisplay.ts index 9e3a1eb4475..8d2338ea6ea 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatDisplay/useWorkspaceChatDisplay.ts +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatDisplay/useWorkspaceChatDisplay.ts @@ -20,8 +20,9 @@ type RouterInputs = inferRouterInputs; type RouterOutputs = inferRouterOutputs; type ChatInputs = RouterInputs["chat"]; type ChatOutputs = RouterOutputs["chat"]; -type DisplayStateOutput = ChatOutputs["getDisplayState"]; -type ListMessagesOutput = ChatOutputs["listMessages"]; +type SnapshotOutput = ChatOutputs["getSnapshot"]; +type DisplayStateOutput = SnapshotOutput["displayState"]; +type ListMessagesOutput = SnapshotOutput["messages"]; type HistoryMessage = ListMessagesOutput[number]; type HistoryMessagePart = HistoryMessage["content"][number]; type SendMessageInput = ChatInputs["sendMessage"]; @@ -63,7 +64,7 @@ function withoutActiveTurnAssistantHistory({ isRunning, }: { messages: ListMessagesOutput; - currentMessage: NonNullable["currentMessage"] | null; + currentMessage: DisplayStateOutput["currentMessage"] | null; isRunning: boolean; }): ListMessagesOutput { if (!isRunning || !currentMessage || currentMessage.role !== "assistant") { @@ -124,12 +125,7 @@ export function useChatDisplay(options: UseChatDisplayOptions) { refetchOnWindowFocus: false, } as const; - const displayQuery = workspaceTrpc.chat.getDisplayState.useQuery( - queryInput as { sessionId: string; workspaceId: string }, - queryOptions, - ); - - const messagesQuery = workspaceTrpc.chat.listMessages.useQuery( + const snapshotQuery = workspaceTrpc.chat.getSnapshot.useQuery( queryInput as { sessionId: string; workspaceId: string }, queryOptions, ); @@ -142,7 +138,8 @@ export function useChatDisplay(options: UseChatDisplayOptions) { workspaceTrpc.chat.respondToQuestion.useMutation(); const respondToPlanMutation = workspaceTrpc.chat.respondToPlan.useMutation(); - const displayState = displayQuery.data ?? null; + const snapshot = snapshotQuery.data ?? null; + const displayState = snapshot?.displayState ?? null; const runtimeErrorMessage = typeof displayState?.errorMessage === "string" && displayState.errorMessage.trim() @@ -152,9 +149,9 @@ export function useChatDisplay(options: UseChatDisplayOptions) { const isRunning = displayState?.isRunning ?? false; const isConversationLoading = isQueryEnabled && - messagesQuery.data === undefined && - (messagesQuery.isLoading || messagesQuery.isFetching); - const historicalMessages = messagesQuery.data ?? []; + snapshotQuery.data === undefined && + (snapshotQuery.isLoading || snapshotQuery.isFetching); + const historicalMessages = snapshot?.messages ?? []; const latestAssistantErrorMessage = isRunning ? null : findLatestAssistantErrorMessage(historicalMessages); @@ -359,8 +356,7 @@ export function useChatDisplay(options: UseChatDisplayOptions) { error: runtimeErrorMessage ?? latestAssistantErrorMessage ?? - displayQuery.error ?? - messagesQuery.error ?? + snapshotQuery.error ?? commandError ?? null, commands, diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/usePaneRegistry.tsx b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/usePaneRegistry.tsx index d3631a3baca..604f6773374 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/usePaneRegistry.tsx +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/usePaneRegistry.tsx @@ -40,6 +40,7 @@ import { } from "../../state/fileDocumentStore"; import type { BrowserPaneData, + ChatPaneData, CommentPaneData, DevtoolsPaneData, FilePaneData, @@ -47,6 +48,7 @@ import type { TerminalPaneData, } from "../../types"; import { BrowserPane, BrowserPaneToolbar } from "./components/BrowserPane"; +import { ChatPane } from "./components/ChatPane"; import { CommentPane } from "./components/CommentPane"; import { DiffPane } from "./components/DiffPane"; import { FilePane } from "./components/FilePane"; @@ -441,13 +443,22 @@ export function usePaneRegistry( /> ), - // Disabled until ChatServiceProvider is wired above v2 panes — - // TiptapPromptEditor needs its tRPC context. - renderPane: (_ctx: RendererContext) => ( -
- Chat pane is temporarily disabled. -
- ), + renderPane: (ctx: RendererContext) => { + const data = ctx.pane.data as ChatPaneData; + return ( + + ctx.actions.updateData({ ...data, sessionId: id }) + } + initialLaunchConfig={data.launchConfig ?? null} + onConsumeLaunchConfig={() => + ctx.actions.updateData({ ...data, launchConfig: null }) + } + /> + ); + }, contextMenuActions: (_ctx, defaults) => defaults.map((d) => d.key === "close-pane" ? { ...d, label: "Close Chat" } : d, diff --git a/packages/chat/src/server/desktop/index.ts b/packages/chat/src/server/desktop/index.ts index 06df62dddd6..390d2dc1a8d 100644 --- a/packages/chat/src/server/desktop/index.ts +++ b/packages/chat/src/server/desktop/index.ts @@ -16,4 +16,9 @@ export { export { ChatService } from "./chat-service"; export type { ChatServiceRouter } from "./router"; export { createChatServiceRouter } from "./router"; +export type { SlashCommand } from "./slash-commands"; +export { + getSlashCommands, + resolveSlashCommand, +} from "./slash-commands"; export { generateTitleFromMessage } from "./title-generation"; diff --git a/packages/host-service/src/app.ts b/packages/host-service/src/app.ts index da0d1840770..26abc493a41 100644 --- a/packages/host-service/src/app.ts +++ b/packages/host-service/src/app.ts @@ -1,6 +1,7 @@ import { createNodeWebSocket } from "@hono/node-ws"; import { trpcServer } from "@hono/trpc-server"; import { Octokit } from "@octokit/rest"; +import { ChatService } from "@superset/chat/server/desktop"; import type { MiddlewareHandler } from "hono"; import { Hono } from "hono"; import { cors } from "hono/cors"; @@ -68,8 +69,13 @@ export function createApp(options: CreateAppOptions): CreateAppResult { db, runtimeResolver: providers.modelResolver, }); + // Provider auth (Anthropic / OpenAI OAuth + API keys) is per-machine, not + // per-workspace. ChatService is a long-lived singleton wrapping mastra's + // auth storage; the `host.auth.*` router proxies to it. + const chatService = new ChatService(); const runtime = { + auth: chatService, chat: chatRuntime, filesystem, pullRequests: pullRequestRuntime, diff --git a/packages/host-service/src/runtime/chat/chat.ts b/packages/host-service/src/runtime/chat/chat.ts index 514fb3871c3..a0df961b362 100644 --- a/packages/host-service/src/runtime/chat/chat.ts +++ b/packages/host-service/src/runtime/chat/chat.ts @@ -1,8 +1,12 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; +import { + getSlashCommands as getSlashCommandsFromCwd, + resolveSlashCommand as resolveSlashCommandFromCwd, +} from "@superset/chat/server/desktop"; import { eq } from "drizzle-orm"; -import { createMastraCode } from "mastracode"; +import { createAuthStorage, createMastraCode } from "mastracode"; import type { HostDb } from "../../db"; import { workspaces } from "../../db/schema"; import type { ModelProviderRuntimeResolver } from "../../providers/model-providers"; @@ -275,6 +279,35 @@ function toRuntimeErrorMessage(error: unknown): string { return "Unexpected chat error"; } +/** + * Pick the model mastra should use for observational-memory reflection + * (a background task that runs after each turn). Mastra's default is + * google/gemini-2.5-flash, which fails when GOOGLE_GENERATIVE_AI_API_KEY + * is unset — we fall back to whichever provider the user has actually + * authenticated with so reflection just uses those credentials. + */ +function resolveOmModelFromAuth(): string | undefined { + if (process.env.GOOGLE_GENERATIVE_AI_API_KEY) + return "google/gemini-2.5-flash"; + const authStorage = createAuthStorage(); + authStorage.reload(); + const anthropic = authStorage.get("anthropic"); + if ( + anthropic?.type === "oauth" || + (anthropic?.type === "api_key" && anthropic.key.trim()) + ) { + return "anthropic/claude-haiku-4-5"; + } + const openai = authStorage.get("openai-codex"); + if ( + openai?.type === "oauth" || + (openai?.type === "api_key" && openai.key.trim()) + ) { + return "openai/gpt-4.1-nano"; + } + return undefined; +} + async function getRuntimeMemoryStore( runtime: RuntimeSession, ): Promise { @@ -357,13 +390,18 @@ async function restartRuntimeFromUserMessage( await runtime.harness.sendMessage(input.payload); } +interface InflightRuntimeCreation { + workspaceId: string; + promise: Promise; +} + export class ChatRuntimeManager { private readonly db: HostDb; private readonly runtimeResolver: ModelProviderRuntimeResolver; private readonly runtimes = new Map(); private readonly runtimeCreations = new Map< string, - Promise + InflightRuntimeCreation >(); constructor(options: ChatRuntimeManagerOptions) { @@ -453,9 +491,16 @@ When you need to ask the user ANY question — including simple yes/no, confirma this.ensureGlobalAgentInstructions(); await this.runtimeResolver.prepareRuntimeEnv(); + const omModel = resolveOmModelFromAuth(); const runtime = await createMastraCode({ cwd, disableMcp: true, + ...(omModel && { + initialState: { + observerModelId: omModel, + reflectorModelId: omModel, + }, + }), }); runtime.hookManager?.setSessionId(sessionId); await runtime.harness.init(); @@ -495,24 +540,79 @@ When you need to ask the user ANY question — including simple yes/no, confirma const inflight = this.runtimeCreations.get(sessionId); if (inflight) { - return inflight; + if (inflight.workspaceId !== workspaceId) { + throw new Error( + `Session ${sessionId} is already being created for workspace ${inflight.workspaceId}`, + ); + } + return inflight.promise; } - const creation = this.createRuntime(sessionId, workspaceId).finally(() => { + const promise = this.createRuntime(sessionId, workspaceId).finally(() => { this.runtimeCreations.delete(sessionId); }); - this.runtimeCreations.set(sessionId, creation); - return creation; + this.runtimeCreations.set(sessionId, { workspaceId, promise }); + return promise; } - async getDisplayState(input: { - sessionId: string; - workspaceId: string; - }): Promise { - const runtime = await this.getOrCreateRuntime( - input.sessionId, - input.workspaceId, - ); + /** + * Tear down the in-memory runtime for a session. Aborts any in-flight + * work, disconnects MCP servers, removes the runtime from the manager's + * map, and is a no-op for unknown session ids. Should be called after + * the cloud session row is deleted, or when a workspace is deleted. + * + * Validates `workspaceId` against the runtime / in-flight creation so a + * caller can't dispose a session bound to a different workspace. + * + * If a creation is in-flight for this session, awaits it first so the + * just-created runtime doesn't get inserted into `runtimes` after we + * delete from it (which would leak). + */ + async disposeRuntime(sessionId: string, workspaceId: string): Promise { + const inflight = this.runtimeCreations.get(sessionId); + if (inflight) { + if (inflight.workspaceId !== workspaceId) { + throw new Error( + `Session ${sessionId} is being created for workspace ${inflight.workspaceId}`, + ); + } + try { + await inflight.promise; + } catch { + // Creation failed — nothing to dispose. + return; + } + } + + const runtime = this.runtimes.get(sessionId); + if (!runtime) return; + + if (runtime.workspaceId !== workspaceId) { + throw new Error( + `Session ${sessionId} is bound to workspace ${runtime.workspaceId}`, + ); + } + + try { + runtime.harness.abort(); + } catch { + // best-effort — proceed with cleanup even if abort fails + } + try { + await runtime.mcpManager?.disconnect(); + } catch { + // best-effort — MCP servers may already be disconnected + } + this.runtimes.delete(sessionId); + } + + /** + * Shape the harness's raw display state into the shape the renderer + * expects. Both getDisplayState and getSnapshot must apply the same + * shaping — keep this the single source of truth so the two functions + * cannot drift. + */ + private buildDisplayState(runtime: RuntimeSession): ChatDisplayState { const displayState = runtime.harness.getDisplayState(); const currentMessage = displayState.currentMessage as { role?: string; @@ -555,6 +655,17 @@ When you need to ask the user ANY question — including simple yes/no, confirma }; } + async getDisplayState(input: { + sessionId: string; + workspaceId: string; + }): Promise { + const runtime = await this.getOrCreateRuntime( + input.sessionId, + input.workspaceId, + ); + return this.buildDisplayState(runtime); + } + async listMessages(input: { sessionId: string; workspaceId: string; @@ -566,6 +677,36 @@ When you need to ask the user ANY question — including simple yes/no, confirma return runtime.harness.listMessages(); } + /** + * Single server-side observation that returns both displayState and messages + * from one runtime acquisition. This avoids the dual-poll race between + * independent getDisplayState / listMessages queries on the client. + * + * Note: not a fully locked atomic snapshot — listMessages() is async, so + * harness state can change between the displayState read and the messages + * read. This still removes the *client-side* two-query race, which is the + * one that caused mismatched message/display state. + */ + async getSnapshot(input: { + sessionId: string; + workspaceId: string; + }): Promise<{ + displayState: ChatDisplayState; + messages: RuntimeMessages; + }> { + const runtime = await this.getOrCreateRuntime( + input.sessionId, + input.workspaceId, + ); + const displayState = this.buildDisplayState(runtime); + const messages = await runtime.harness.listMessages(); + // Intentionally no observedAt: when the harness state hasn't changed, + // the response object is structurally identical to the previous poll's + // response, so React Query's structuralSharing preserves the object + // identity and idle polls don't trigger downstream rerenders. + return { displayState, messages }; + } + async sendMessage( input: ChatSendMessageInput, ): Promise { @@ -645,39 +786,41 @@ When you need to ask the user ANY question — including simple yes/no, confirma return runtime.harness.respondToPlanApproval(input.payload); } - async getSlashCommands(_input: { - sessionId: string; - workspaceId: string; - }): Promise< + private resolveWorkspaceCwd(workspaceId: string): string { + const workspace = this.db.query.workspaces + .findFirst({ where: eq(workspaces.id, workspaceId) }) + .sync(); + if (!workspace) { + throw new Error(`Workspace not found: ${workspaceId}`); + } + return workspace.worktreePath; + } + + async getSlashCommands(input: { workspaceId: string }): Promise< Array<{ name: string; aliases: string[]; description: string; argumentHint: string; - kind: "builtin" | "prompt"; + kind: "builtin" | "custom"; }> > { - return []; + const cwd = this.resolveWorkspaceCwd(input.workspaceId); + return getSlashCommandsFromCwd(cwd).map((command) => ({ + name: command.name, + aliases: command.aliases, + description: command.description, + argumentHint: command.argumentHint, + kind: command.kind, + })); } - async resolveSlashCommand(input: { - sessionId: string; - workspaceId: string; - text: string; - }) { - return { - handled: false, - invokedAs: input.text.trim().startsWith("/") - ? input.text.trim() - : undefined, - }; + async resolveSlashCommand(input: { workspaceId: string; text: string }) { + const cwd = this.resolveWorkspaceCwd(input.workspaceId); + return resolveSlashCommandFromCwd(cwd, input.text); } - async previewSlashCommand(input: { - sessionId: string; - workspaceId: string; - text: string; - }) { + async previewSlashCommand(input: { workspaceId: string; text: string }) { return this.resolveSlashCommand(input); } diff --git a/packages/host-service/src/trpc/router/auth/auth.ts b/packages/host-service/src/trpc/router/auth/auth.ts new file mode 100644 index 00000000000..5e45b377abd --- /dev/null +++ b/packages/host-service/src/trpc/router/auth/auth.ts @@ -0,0 +1,83 @@ +import { z } from "zod"; +import { protectedProcedure, router } from "../../index"; + +const anthropicOAuthCodeInput = z.object({ + code: z.string().min(1), +}); +const openAIOAuthCodeInput = z.object({ + code: z.string().optional(), +}); +const anthropicApiKeyInput = z.object({ + apiKey: z.string().min(1), +}); +const openAIApiKeyInput = z.object({ + apiKey: z.string().min(1), +}); +const anthropicEnvConfigInput = z.object({ + envText: z.string(), +}); + +export const authRouter = router({ + getAnthropicStatus: protectedProcedure.query(({ ctx }) => { + return ctx.runtime.auth.getAnthropicAuthStatus(); + }), + startAnthropicOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.startAnthropicOAuth(); + }), + completeAnthropicOAuth: protectedProcedure + .input(anthropicOAuthCodeInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.completeAnthropicOAuth({ code: input.code }); + }), + cancelAnthropicOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.cancelAnthropicOAuth(); + }), + disconnectAnthropicOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.disconnectAnthropicOAuth(); + }), + setAnthropicApiKey: protectedProcedure + .input(anthropicApiKeyInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.setAnthropicApiKey({ apiKey: input.apiKey }); + }), + clearAnthropicApiKey: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.clearAnthropicApiKey(); + }), + getAnthropicEnvConfig: protectedProcedure.query(({ ctx }) => { + return ctx.runtime.auth.getAnthropicEnvConfig(); + }), + setAnthropicEnvConfig: protectedProcedure + .input(anthropicEnvConfigInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.setAnthropicEnvConfig({ envText: input.envText }); + }), + clearAnthropicEnvConfig: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.clearAnthropicEnvConfig(); + }), + + getOpenAIStatus: protectedProcedure.query(({ ctx }) => { + return ctx.runtime.auth.getOpenAIAuthStatus(); + }), + startOpenAIOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.startOpenAIOAuth(); + }), + completeOpenAIOAuth: protectedProcedure + .input(openAIOAuthCodeInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.completeOpenAIOAuth({ code: input.code }); + }), + cancelOpenAIOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.cancelOpenAIOAuth(); + }), + disconnectOpenAIOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.disconnectOpenAIOAuth(); + }), + setOpenAIApiKey: protectedProcedure + .input(openAIApiKeyInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.setOpenAIApiKey({ apiKey: input.apiKey }); + }), + clearOpenAIApiKey: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.clearOpenAIApiKey(); + }), +}); diff --git a/packages/host-service/src/trpc/router/auth/index.ts b/packages/host-service/src/trpc/router/auth/index.ts new file mode 100644 index 00000000000..4f82de4a3ba --- /dev/null +++ b/packages/host-service/src/trpc/router/auth/index.ts @@ -0,0 +1 @@ +export { authRouter } from "./auth"; diff --git a/packages/host-service/src/trpc/router/chat/chat.ts b/packages/host-service/src/trpc/router/chat/chat.ts index 5e639ef0fa5..c6debcbe747 100644 --- a/packages/host-service/src/trpc/router/chat/chat.ts +++ b/packages/host-service/src/trpc/router/chat/chat.ts @@ -8,6 +8,13 @@ const sessionInput = z.object({ workspaceId: z.uuid(), }); +// Slash-command discovery / preview / resolve are workspace-scoped, not +// session-scoped — they only need a workspaceId so they work in fresh +// chats before the first message creates a session. +const workspaceSlashInput = z.object({ + workspaceId: z.uuid(), +}); + const sendMessagePayloadSchema = z.object({ content: z.string(), files: z @@ -41,6 +48,12 @@ export const chatRouter = router({ return ctx.runtime.chat.listMessages(input); }), + getSnapshot: protectedProcedure + .input(sessionInput) + .query(({ ctx, input }) => { + return ctx.runtime.chat.getSnapshot(input); + }), + sendMessage: protectedProcedure .input( sessionInput.extend({ @@ -48,8 +61,22 @@ export const chatRouter = router({ metadata: messageMetadataSchema, }), ) - .mutation(({ ctx, input }) => { - return ctx.runtime.chat.sendMessage(input); + .mutation(async ({ ctx, input }) => { + const result = await ctx.runtime.chat.sendMessage(input); + // Fire-and-forget cloud lastActiveAt update so the session selector + // keeps reordering after activity. Failures here must not block the + // turn — the user already sees their message land via the snapshot. + void ctx.api.chat.updateSession + .mutate({ sessionId: input.sessionId, lastActiveAt: new Date() }) + .catch(() => {}); + return result; + }), + + endSession: protectedProcedure + .input(sessionInput) + .mutation(async ({ ctx, input }) => { + await ctx.runtime.chat.disposeRuntime(input.sessionId, input.workspaceId); + return { ok: true }; }), restartFromMessage: protectedProcedure @@ -110,14 +137,14 @@ export const chatRouter = router({ }), getSlashCommands: protectedProcedure - .input(sessionInput) + .input(workspaceSlashInput) .query(({ ctx, input }) => { return ctx.runtime.chat.getSlashCommands(input); }), resolveSlashCommand: protectedProcedure .input( - sessionInput.extend({ + workspaceSlashInput.extend({ text: z.string(), }), ) @@ -127,7 +154,7 @@ export const chatRouter = router({ previewSlashCommand: protectedProcedure .input( - sessionInput.extend({ + workspaceSlashInput.extend({ text: z.string(), }), ) diff --git a/packages/host-service/src/trpc/router/router.ts b/packages/host-service/src/trpc/router/router.ts index a81ada2949e..fc3991cc7d8 100644 --- a/packages/host-service/src/trpc/router/router.ts +++ b/packages/host-service/src/trpc/router/router.ts @@ -1,4 +1,5 @@ import { router } from "../index"; +import { authRouter } from "./auth"; import { chatRouter } from "./chat"; import { cloudRouter } from "./cloud"; import { filesystemRouter } from "./filesystem"; @@ -16,6 +17,7 @@ import { workspaceCleanupRouter } from "./workspace-cleanup"; import { workspaceCreationRouter } from "./workspace-creation"; export const appRouter = router({ + auth: authRouter, health: healthRouter, host: hostRouter, chat: chatRouter, diff --git a/packages/host-service/src/types.ts b/packages/host-service/src/types.ts index 148b1c696b0..cfd5adb3bb9 100644 --- a/packages/host-service/src/types.ts +++ b/packages/host-service/src/types.ts @@ -1,4 +1,5 @@ import type { Octokit } from "@octokit/rest"; +import type { ChatService } from "@superset/chat/server/desktop"; import type { AppRouter } from "@superset/trpc"; import type { TRPCClient } from "@trpc/client"; import type { HostDb } from "./db"; @@ -11,6 +12,7 @@ import type { PullRequestRuntimeManager } from "./runtime/pull-requests"; export type ApiClient = TRPCClient; export interface HostServiceRuntime { + auth: ChatService; chat: ChatRuntimeManager; filesystem: WorkspaceFilesystemManager; pullRequests: PullRequestRuntimeManager; diff --git a/packages/trpc/src/router/chat/chat.ts b/packages/trpc/src/router/chat/chat.ts index 3a4ad94cb2e..632e16359fa 100644 --- a/packages/trpc/src/router/chat/chat.ts +++ b/packages/trpc/src/router/chat/chat.ts @@ -83,6 +83,7 @@ export const chatRouter = { z.object({ sessionId: z.uuid(), title: z.string().optional(), + lastActiveAt: z.date().optional(), }), ) .mutation(async ({ ctx, input }) => { @@ -99,6 +100,9 @@ export const chatRouter = { if (input.title !== undefined) { updates.title = input.title; } + if (input.lastActiveAt !== undefined) { + updates.lastActiveAt = input.lastActiveAt; + } if (Object.keys(updates).length === 0) { return { updated: false }; diff --git a/plans/background-agents-chat-architecture-reference.md b/plans/background-agents-chat-architecture-reference.md new file mode 100644 index 00000000000..e8d64d2dfb5 --- /dev/null +++ b/plans/background-agents-chat-architecture-reference.md @@ -0,0 +1,340 @@ +# Background Agents (Open-Inspect) Chat Architecture — Reference Notes + +Research notes on how [Open-Inspect](./temp/background-agents) implements chat for background coding agents with multiplayer real-time collaboration. Written as background for the v2 chat transport rearchitecture (see `host-service-chat-architecture.md`, `v2-chat-greenfield-architecture.md`, and the companion `t3code-chat-architecture-reference.md` and `opencode-electron-chat-architecture-reference.md`). All paths below are relative to `temp/background-agents/` unless noted. + +## TL;DR + +Open-Inspect (inspired by Ramp's Inspect) is the most directly relevant reference architecture for what we're building. It uses **Cloudflare Durable Objects as the control plane** — one DO per session holding per-session SQLite, a WebSocket hub, and a FIFO prompt queue — and **Modal sandboxes as the execution plane**. Multiple humans can collaborate on one session in real time: every connected client subscribes to the same DO, the DO broadcasts events, and a small participant-presence service keeps everyone aware of who's there. WebSocket hibernation makes thousands of idle sessions cheap. Sessions can spawn child sessions into separate sandboxes. Input can originate from the web UI, Slack, GitHub, Linear, or webhooks — they all converge on the same session DO. This is **exactly the DO-based architecture I proposed as P5 of our plan, already built, production-style.** + +## Architecture diagram + +``` +┌──────────────────────────┐ ┌─────────────────────────────────┐ ┌─────────────────────┐ +│ Clients (many per │ │ Control plane │ │ Execution plane │ +│ session, many types) │ │ Cloudflare Workers + DO │ │ Modal sandbox │ +├──────────────────────────┤ ├─────────────────────────────────┤ ├─────────────────────┤ +│ │ │ │ │ │ +│ Web UI │ │ SessionDO (one per session) │ │ supervisor │ +│ (Next.js + React) │────┐ │ ┌──────────────────────────┐ │ │ ├─ entrypoint.py │ +│ │ │ │ │ per-session SQLite │ │ │ ├─ OpenCode agent │ +│ Slack bot │────┼─▶│ │ session · participants │ │ ┌──▶│ └─ bridge │ +│ │ │ │ │ messages (FIFO queue) │ │ │ │ (WS back) │ +│ GitHub bot (PR hooks) │────┤ │ │ events (indexed stream)│ │ │ │ │ +│ │ │ │ │ artifacts · sandbox │ │ │ │ filesystem: │ +│ Linear bot │────┤ │ │ ws_client_mapping │ │ │ │ workspace + │ +│ │ │ │ └──────────────────────────┘ │ │ │ dev environment │ +│ Webhooks / cron │────┘ │ │ │ │ │ +│ │ │ WebSocket hub (hibernation) │◀──┼───│ emits: │ +│ ────────────────────────│ │ many client WS + │ │ │ token events │ +│ │ │ one sandbox WS per session │ │ │ tool-call events│ +│ open a session: │ │ │ │ │ step-finish │ +│ POST /sessions │─────▶ │ HTTP surface: │───┘ │ cost · errors │ +│ (web / bot / hook) │ │ POST /sessions │ │ │ +│ │ │ POST /sessions/:id/ws-token │ │ │ +│ join its stream: │ │ GET /sessions/:id/ws │ │ │ +│ GET /sessions/:id/ws │─────▶ │ POST /sessions/:id/children/* │ │ │ +│ (WS, hibernation tag) │ │ POST /sessions/:id/cancel │ │ │ +│ │ │ GET /sessions/:id/spawn-ctx │ │ │ +│ │ │ │ │ │ +└──────────────────────────┘ └─────────────────────────────────┘ └─────────────────────┘ + │ ▲ + ▼ │ + ┌───────────────────┐ │ + │ Global D1 │ │ + │ session_index │ │ + │ repo_metadata │ │ + │ repo_secrets │ Modal lifecycle: spawn, │ + │ user_scm_tokens │ ready, snapshot, stop, │ + └───────────────────┘ restore-from-snapshot │ + │ + spawn_task (agent-initiated) ─────────────┘ + child session = new SessionDO + new sandbox + + Transports: + • Client ↔ Control plane : HTTPS + WebSocket (with hibernation) + • Control plane ↔ Sandbox: WebSocket (sandbox-auth-token handshake) + • Control plane ↔ D1 : HTTP (global metadata, secrets, session index) + • Sandbox ↔ git remote : HTTPS (GitHub App token, ephemeral) +``` + +### Same thing as a Mermaid diagram + +```mermaid +flowchart LR + subgraph Clients["Clients (many per session)"] + direction TB + Web["Web UI (Next.js)"] + Slack["Slack bot"] + GitHub["GitHub bot"] + Linear["Linear bot"] + Webhooks["Webhooks / cron"] + end + + subgraph ControlPlane["Control plane (Cloudflare Workers + Durable Objects)"] + direction TB + subgraph DO["SessionDO (one per session)"] + direction TB + SQLite[("per-session SQLite
session · participants ·
messages (FIFO) · events ·
artifacts · sandbox ·
ws_client_mapping")] + WSHub["WebSocket hub
client WSs + sandbox WS
(hibernation)"] + Presence["Presence service"] + Lifecycle["SandboxLifecycleManager
(pure decision fns)"] + end + HTTP["HTTP surface
POST /sessions · /children/* · /cancel
GET /ws · /spawn-context"] + D1[("Global D1
session_index · repo_metadata ·
repo_secrets · user_scm_tokens")] + HTTP --> DO + DO --> D1 + end + + subgraph Exec["Execution plane (Modal)"] + direction TB + Sandbox["Sandbox
supervisor · OpenCode agent · bridge"] + Snap[("Modal Image snapshots
(filesystem state)")] + Sandbox --> Snap + end + + Web -->|HTTPS + WS| HTTP + Slack --> HTTP + GitHub --> HTTP + Linear --> HTTP + Webhooks --> HTTP + + Web <-.->|WS: sandbox_event · presence · etc| WSHub + WSHub <-.->|WS: sandbox-auth-token| Sandbox + DO -.->|spawn_task creates child DO| DO +``` + +Solid arrows = requests. Dotted arrows = long-lived streaming connections. + +## Topology + +Three explicit tiers and the middle one is a Durable Object: + +1. **Clients** — web, Slack, GitHub, Linear, webhooks. All converge on the same HTTP + WS surface. +2. **Control plane** — Cloudflare Workers routing requests to per-session Durable Objects. Each DO owns one session: its SQLite database, its WebSocket connections, its lifecycle state. Stateless D1 database underneath for global indexes and repo metadata. +3. **Execution plane** — Modal sandboxes. One sandbox per session (or per child session). Runs OpenCode agent inside a real dev environment. Connects back to its session's DO via WebSocket. + +## Packages + +- `shared/` — TypeScript types, auth utilities, session/spawn context shapes. Consumed by everything. +- `control-plane/` — Cloudflare Workers + Durable Objects. The brain. Hosts `SessionDO`. +- `web/` — Next.js 16 + React 19 app. Session UI, OAuth, dashboard, real-time streaming. +- `slack-bot/`, `github-bot/`, `linear-bot/` — Cloudflare Workers (Hono). Translate external events into control-plane HTTP calls. +- `modal-infra/` — Python 3.12 Modal app. Sandbox supervisor + OpenCode runner + bridge that talks to the session DO. +- `sandbox-runtime/` — Python. Shared sandbox utilities. +- `daytona-infra/` — alternative sandbox provider, less used. Modal is the default. +- `terraform/` — IaC for Cloudflare Workers, Vercel, Modal, D1 schema migrations. Production deployment model, not demo-ware. + +## Control plane: Durable Objects + +Every session is a `SessionDO` addressed by session ID. Each DO owns: + +- **Per-session SQLite database** (lives inside the DO). Tables (`control-plane/src/session/schema.ts`): + - `session` — repo, branch, model, status (`created | active | completed | failed | archived | cancelled`), cost. + - `participants` — users present in this session with encrypted SCM tokens and `ws_auth_token` hashes. + - `messages` — prompt queue, FIFO by insertion order. + - `events` — sandbox events (tokens, tool calls, step-finish, errors). Indexed on `(created_at, id)` for cursor-paginated reads. + - `artifacts` — PRs, screenshots, branch refs. + - `sandbox` — current sandbox id, status, auth token, snapshot image id. + - `ws_client_mapping` — stable `wsId → participantId` so hibernated WebSockets can be rehydrated. +- **Active WebSocket connections** — many client WSs + one sandbox WS per session. +- **Sandbox lifecycle state machine** — implemented as pure decision functions (`evaluateSpawnDecision`, `evaluateCircuitBreaker`, `evaluateInactivityTimeout`, `evaluateHeartbeatHealth` in `control-plane/src/sandbox/lifecycle/manager.ts`). + +**Why DOs earn their keep here:** + +- Single-threaded per session → no concurrency races inside one session. +- SQLite-backed storage → durable, survivable across deploys, supports range reads. +- WebSocket hibernation → sessions can be idle for hours with zero cost and wake instantly when a message arrives. +- Global addressability → a Slack bot in one Worker can route to the exact DO holding the session. + +**Global state in D1** (regular Cloudflare D1, not per-session): + +- `session_index` — list of all sessions, keyed by user_id, for dashboards. +- `repo_metadata` — descriptions, aliases, Slack channel associations. +- `repo_secrets` — AES-256-GCM encrypted environment variables per repo. +- `user_scm_tokens` — cached OAuth tokens with refresh logic. + +## Transport: WebSocket with hibernation + +One WS per client, plus one WS per sandbox, all terminating at the session's DO. + +**Authentication flow:** + +1. User OAuths against GitHub → gets user id and SCM token. +2. Client calls `POST /sessions/:id/ws-token` and receives a 24-hour JWT. +3. Client opens `GET /sessions/:id/ws` WebSocket. +4. Client sends `{ type: "subscribe", token, clientId }` as first message. +5. DO validates the token hash against `participants.ws_auth_token`, looks up the participant, tags the WS with `wsid:` via `ctx.acceptWebSocket(ws, [tag])`, records the mapping in `ws_client_mapping`. +6. DO replies with `{ type: "subscribed", sessionId, state, artifacts, participantId, replay? }`. + +After that, the WS is hibernation-eligible — the DO can sleep while the WS stays open. + +**Client → server messages:** + +- `ping` · `subscribe` · `prompt { content, model?, attachments? }` · `stop` · `typing` · `presence { status, cursor? }` + +**Server → client messages:** + +- `pong` · `subscribed { …, replay? }` · `sandbox_event { event }` · `presence_sync` · `presence_update` · `sandbox_spawning | sandbox_ready | sandbox_error` · `artifact_created` · `snapshot_saved` · `session_status` · `child_session_update` · `error` + +**Hibernation recovery.** When the DO wakes after hibernation, it reads `ws_client_mapping` to re-associate each WS with its participant. No client action is required; the client is simply still subscribed. + +## Session model + +**One session = one piece of work tied to a repo.** Sessions are long-lived across client connections — you close your browser, come back tomorrow, the session is still there. + +**Created via:** web (`POST /sessions`), Slack `@mention`, GitHub PR webhook, Linear issue assignment, or automation trigger. All converge on the same creation path that: + +1. Generates session id. +2. Writes to `session_index` (global D1). +3. Creates the `SessionDO` and initializes its per-session SQLite. +4. Inserts the initial prompt into `messages` as `pending`. + +**Status lifecycle:** `created → active → completed | failed | archived | cancelled`. + +**Message queue.** Prompts go into the `messages` table with FIFO order. The DO processes one at a time. Concurrent `prompt` messages from two users on the same session just queue up — no dropping, no merge conflict. + +## Multiplayer real-time collaboration + +This is what makes Open-Inspect unusually relevant for us. Multiple humans can subscribe to the same session DO and see identical state in real time. + +**Event broadcasting.** When the sandbox emits an event, the DO: + +1. Persists it into the per-session `events` table. +2. Calls `forEachClientSocket("authenticated_only", ws => ws.send({ type: "sandbox_event", event }))`. + +Every connected client sees the same stream in the same order. No per-client state, no reconciliation. + +**Presence.** A `PresenceService` inside the DO maintains the roster of currently connected clients with last-seen timestamps and active/idle status. `presence_sync` on join hands a client the current roster; `presence_update` fans out on changes. + +**Identity.** Each event carries `participantId`, `name`, `avatar`, derived from the `participants` table. Clients render messages with correct attribution regardless of which user typed them. + +**Concurrency.** DOs are single-threaded per session — a Cloudflare platform guarantee. If User A and User B send prompts at the same moment, both hit the DO serially; both inserts go into `messages` in insertion order. The agent processes them one at a time. No CRDTs, no locks, no conflict resolution — the DO's single-threaded invariant does the work. + +**Event replay on reconnect.** The `subscribed` reply to a re-joining client can include an optional `replay: { events, hasMore, cursor }` payload. The client catches up via cursor-paginated history, then joins the live stream. Cursor is `{ timestamp, id }` into the `events` table's index — the same shape any paginator would use. + +**What this gets you.** A user starts a session on desktop, walks away, someone else on a phone opens the same session and sees the full history plus the live tail. Both can chime in; both see the other's messages. When the user gets back to desktop, they see both their and their colleague's contributions. No extra plumbing, no conflict resolution, no sync issues. + +## Parallel sub-tasks (`spawn_task`) + +An agent tool lets a running session spawn **child sessions** into separate sandboxes. Implemented as a typed tool the agent can call during a turn. + +**Guardrails:** + +- `MAX_SPAWN_DEPTH = 2` — children can't spawn children (prevents fork bombs). +- `MAX_CONCURRENT_CHILDREN = 5` — at most five running at once. +- `MAX_TOTAL_CHILDREN = 15` — lifetime per parent session. + +**Mechanics:** + +1. Parent agent calls `spawn_task({ title, prompt })`. +2. Control plane creates new session with `parent_session_id`, `spawn_source: "agent"`, `spawn_depth: parent.depth + 1`, inheriting repo/model/owner. +3. Child fetches `GET /sessions/:id/spawn-context` to get parent-owned SCM tokens and model config. +4. Child sandbox spins up, child DO enqueues the prompt, child runs independently. +5. Parent continues its own turn; it does not block on the child. +6. Child posts progress via `POST /sessions/:parent-id/children/:child-id` which the parent DO broadcasts as `child_session_update` to the parent's subscribers. +7. Parent agent can call `get_task_status` and `cancel_task` tools to poll or abort children. +8. Final merge (PRs, file changes) is explicit — the parent decides what to do with children's artifacts. + +This is a non-trivial pattern. No CRDT, no automatic merging — just typed tools, a parent-child graph, and explicit coordination. + +## Sandbox lifecycle (Modal) + +Three startup modes, chosen by the lifecycle manager based on current state: + +- **Fresh start:** spawn container → clone repo → run `.openinspect/setup.sh` → run `.openinspect/start.sh` → agent ready. Slowest (~30-300s). +- **Snapshot restore:** restore filesystem from Modal Image snapshot → `git pull` → run `start.sh` → agent ready. Usually <10s. +- **Repo image start:** use pre-built image → incremental `git pull` → `start.sh` → agent ready. Also fast. + +**Snapshots** capture filesystem state and are taken after successful prompts, before inactivity timeout, or on explicit request. Stored as Modal Image IDs referenced from the `sandbox` row in per-session SQLite. + +**Warming on typing.** When a client sends `{ type: "typing" }`, the control plane broadcasts `sandbox_warming` and begins spawning the sandbox speculatively. By the time the actual prompt arrives the sandbox is often ready. Hides cold-start latency. + +**Lifecycle decisions are pure functions.** `evaluateSpawnDecision(state) → decision`, `evaluateCircuitBreaker(state) → decision`, etc., return discriminated-union results; the manager then performs side effects via injected dependencies. Easy to unit-test, easy to reason about. + +## Entry point unification + +All integrations terminate at the same `POST /sessions` + WebSocket surface. The session doesn't know whether a prompt came from Slack or the web — the message goes into the same `messages` queue. + +**Callback notifications** for integrations that need a feedback loop: `CallbackNotificationService` dispatches async tasks (`ctx.waitUntil`) when the agent makes progress — a tool call, a PR creation, a completion — and the Slack/GitHub/Linear bot posts an update back to the original thread/PR/issue. These don't block the session and don't count against the prompt queue. + +## Persistence + +| Data | Location | +|---|---| +| Messages queue + history | Per-session SQLite in DO | +| Events stream | Per-session SQLite in DO, indexed on `(created_at, id)` | +| Participants + encrypted SCM tokens | Per-session SQLite in DO | +| Artifacts (PRs, screenshots) | Per-session SQLite + R2 for large media | +| Sandbox lifecycle state | Per-session SQLite in DO | +| WebSocket hibernation mapping | `ws_client_mapping` in per-session SQLite | +| Global session index | D1 (global) | +| Repo metadata + aliases | D1 | +| Repo secrets (encrypted) | D1 | +| User OAuth tokens (cached) | D1 | + +Everything session-scoped is local to the DO. Everything shared is in D1. No Postgres, no Redis, no broker. Cloudflare's platform primitives carry it. + +## Tool calls, approvals, interrupts + +**Tool calls** emit `tool_call` events: `{ tool, args, callId, status: "running" | "completed" | "error" }`. Broadcast to all clients. No server-side approval step — tools execute immediately. + +**Interrupts.** Client sends `{ type: "stop" }`. DO closes the sandbox WS, marks sandbox `stopped`, sets session status `cancelled`, broadcasts `session_status`. + +**Approvals.** Not implemented. Would require adding a `pending` tool-call state with a client-originated approval message and a sandbox-side block. Achievable but not present. + +## Auth / single-tenant model + +Open-Inspect is **single-tenant by design** — "all users are trusted members of the same organization." One shared GitHub App installation per deployment, no per-user repo access validation, no tenant isolation. + +**Token types:** + +- **GitHub App token** (shared, ephemeral) — clone and push from sandbox. +- **User OAuth token** (per user) — PR creation and attribution. AES-256-GCM encrypted at rest in `participants`. +- **Sandbox auth token** — one per session; sandbox uses it to prove itself to the session DO. +- **WebSocket JWT** — one per client-session pair, 24h TTL. + +**Why single-tenant?** The shared GitHub App model is the architectural shortcut that makes collaboration easy. Multi-tenant would require per-tenant GitHub App installations, access validation on session creation, and tenant isolation in the data model — none of which is here. + +## Noteworthy patterns worth stealing + +- **Durable Object per session as the whole "one owner per session + durable state + multi-subscriber fan-out" primitive in one building block.** This is the P5 architecture in my v2-chat plan, already built. No Postgres event table, no in-process pubsub, no LISTEN/NOTIFY — DOs give it to you. +- **WebSocket hibernation with `ws_client_mapping`.** Idle sessions cost nothing; reconnects are seamless; multi-device works without any special code. +- **Cursor-based event replay** (`{ timestamp, id }` paginator over the events table). Simple, indexed, works for any subscriber joining late. +- **FIFO prompt queue in SQL.** Concurrent prompts from multiple users queue cleanly; no race, no drop. +- **Pure decision functions for lifecycle.** Testable, easy to reason about, decoupled from I/O. +- **Presence as a first-class thing.** Not just "who's connected" but also status (active/idle), last-seen, role. Worth copying the shape if we ever add presence. +- **Speculative sandbox warming on typing.** Cheap UX win that hides a few hundred ms of cold-start every turn. +- **Entry-point unification at the HTTP layer.** One `POST /sessions`, many producers. Keeps the bot implementations skinny. +- **Pure-tool model for sub-task spawning.** The agent gets `spawn_task` / `get_task_status` / `cancel_task` as regular typed tools; parallelism is an agent-level concern, not an infrastructure one. + +## Things that are fragile (or we'd do differently) + +- **DO storage size cap.** SQLite per DO is 10 GB today. Per-session is fine but a very chatty session could bump into it. No eviction story visible. +- **24 h WS JWT with no auto-rotation.** Close code 4001 on expiry, and the web client's retry logic isn't obvious from the code — if a user leaves a tab open overnight, they likely have to re-auth. +- **D1 as the global session-index bottleneck.** SQLite under the hood; fine at low-to-mid scale, but high concurrent session creation could hit contention. Not a problem we'll face any time soon. +- **No approval flow.** Auto-approve-everything is a deliberate choice given sandboxes are ephemeral and scoped, but it means there's no building block for "ask the user before running this shell command." +- **Single-tenant assumption baked in.** Shared GitHub App + no per-user access validation is the explicit design. Fine for Ramp-shaped internal tools, not fine for a product with external users. +- **Snapshot failures are silent.** If a Modal snapshot fails, the next session pays a cold-start cost and nobody tells you. +- **Events are lost on session archival/deletion.** No long-term archival to object storage beyond media artifacts. If "audit this session from six months ago" becomes a requirement, we'd need to add it. +- **Sandbox runs OpenCode, period.** No provider abstraction inside the sandbox. If we ever want Claude Code or Codex inside Modal, it's another implementation in `modal-infra`, not a swap. + +## Signal for our rearchitecture + +Ranked by direct relevance: + +1. **This is the DO-based P5 we were sketching, already running.** Same shape — one DO per session, per-session SQLite, WebSocket hub with hibernation, events table with cursor replay. The exact things I said DOs give you (single-threaded ordering, built-in fan-out, durable storage per session, cheap idle cost) are the exact things this uses. +2. **Multi-subscriber real-time is solved by "subscribe every client to the same DO and broadcast events."** No custom broker. If we ever want multi-user collaboration in a Superset workspace, the pattern transfers directly. +3. **Entry-point unification** — `POST /sessions` from web or a bot lands in the same session. For Superset this maps onto "session is a workspace thing, reachable from any client" — web, mobile, a hypothetical Slack bot — without special per-integration logic. +4. **Hibernation as a cost story.** Idle chat sessions are free (no running server). Important if we want to keep long-lived history accessible. +5. **FIFO prompt queue in SQL as the concurrency primitive.** Concurrent `sendMessage` from two devices? Both insert into the same table, agent picks up one at a time, no race. Much simpler than in-process per-session async queues. +6. **Cursor replay over a timestamped events table.** Same idea as our `replayEvents(fromSeq)` RPC — subtly different (timestamp+id pair vs monotonic seq). Their version has the advantage of not needing a separate counter; ours has the advantage that gap detection is a subtraction. +7. **Separation of control plane and execution plane.** Cloudflare DO for state + WebSocket, Modal for the sandbox. If we move toward cloud runtime, this split is the right shape: state layer stays tight and durable, execution layer is wherever the agent happens to run. + +Things **not** to take directly: + +- **Single-tenant shortcut.** Fine for them, not a fit for us. +- **Auto-approve everything.** We want disciplined typed approvals like t3code's. +- **OpenCode-only in the sandbox.** We have our own harness (Mastracode) and shouldn't replace it. +- **Cloudflare ecosystem lock-in** — a real cost to weigh even when adopting the *pattern*. Implementing the same shape over Node + Postgres is achievable; it's just more code than DOs give you for free. + +The bigger meta-signal: **everything we've been sketching about cloud runtime + multi-device + multi-subscriber already has a concrete, running reference implementation here.** If we get to P5 of our plan and the decision is "Cloudflare DOs vs. rolling our own on Postgres," this repo is the argument for Cloudflare. It's not a prototype — it's a real system with bot integrations, parallel sub-tasks, presence, snapshots, and a Terraform deploy story. diff --git a/plans/opencode-electron-chat-architecture-reference.md b/plans/opencode-electron-chat-architecture-reference.md new file mode 100644 index 00000000000..6f91a0edbad --- /dev/null +++ b/plans/opencode-electron-chat-architecture-reference.md @@ -0,0 +1,298 @@ +# OpenCode (Electron) Chat Architecture — Reference Notes + +Research notes on how [OpenCode](./temp/opencode) wires chat inside its Electron desktop app. Written as background for the v2 chat transport rearchitecture (see `host-service-chat-architecture.md`, `chat-mastra-rebuild-execplan.md`, and the companion `t3code-chat-architecture-reference.md`). All paths below are relative to `temp/opencode/` unless noted. + +## TL;DR + +OpenCode runs its agent runtime **in the same Node process as the Electron main process**. On startup, main calls `spawnLocalServer()` which binds a Hono HTTP server to `127.0.0.1:` with HTTP Basic auth (username `opencode`, password = a UUID generated per app launch). The renderer talks to that server over plain HTTP + **Server-Sent Events** — not IPC, not WebSockets. Writes are REST; reads are a single long-lived SSE subscription to a global event bus. Client state is SolidJS + a lightweight event bus; no Redux/Zustand. Partial assistant output streams as **`message.part.delta`** events (field + delta string) that the client applies token-by-token, coalesced to one flush per animation frame. + +## Architecture diagram + +``` +┌────────────────────────── Electron (one Node / V8 process) ──────────────────────────┐ +│ │ +│ ┌─────────────── MAIN ─────────────┐ ┌────────── RENDERER ──────────┐ │ +│ │ │ │ │ │ +│ │ preload/index.ts │ │ SolidJS app │ │ +│ │ └─ contextBridge "api": │ │ │ │ +│ │ awaitInitialization() ────┼─ IPC ────▶ │ window.api │ │ +│ │ storeGet / storeSet │ (bootstrap│ { url, username, password } │ │ +│ │ dialogs, killSidecar │ only) │ │ │ │ +│ │ │ │ ▼ │ │ +│ │ spawnLocalServer() │ ◀── REST ─│ global-sdk.tsx │ │ +│ │ └─ Hono @ 127.0.0.1: │ ── SSE ──▶│ SSE consumer │ │ +│ │ basic auth: opencode / UUID │ │ per-frame coalescer (~16ms)│ │ +│ │ CORS: oc://renderer │ │ │ │ │ +│ │ │ │ │ ▼ │ │ +│ │ ▼ │ │ Solid event bus │ │ +│ │ Effect runtime (in-proc) │ │ (keyed by directory) │ │ +│ │ └─ Bus (PubSub, fan-out) │ │ │ │ │ +│ │ SessionPrompt.Service │ │ ▼ │ │ +│ │ Permission.Service │ │ Per-view reactive stores │ │ +│ │ ToolRegistry.Service │ │ (SolidJS createStore) │ │ +│ │ LLM (Vercel AI SDK) │ │ │ │ │ +│ │ │ │ │ ▼ │ │ +│ │ ▼ │ │ SolidJS components │ │ +│ │ SQLite (Drizzle) │ │ │ │ +│ │ └─ opencode.db │ │ │ │ +│ │ MessageTable · PartTable · │ │ │ │ +│ │ SessionTable │ │ │ │ +│ │ │ │ │ │ +│ └──────────────────────────────────┘ └──────────────────────────────┘ │ +│ │ +│ ───────────────── transport between the two halves (loopback) ───────────────── │ +│ │ +│ (1) REST (write path) POST /session/:sessionID/message │ +│ body: PromptInput │ +│ returns: MessageV2.WithParts (final turn, synchronous) │ +│ │ +│ (2) SSE (read path) GET /event (10s server heartbeat, 15s client timeout) │ +│ data: { type: "message.part.updated", properties: {part}} │ +│ data: { type: "message.part.delta", │ +│ properties: { partID, field, delta } } │ +│ data: { type: "session.updated", ... } │ +│ data: { type: "lsp.updated", ... } │ +│ │ +└──────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Two things to notice. First, there is no process boundary between `spawnLocalServer()` and the rest of main — the "server" is just another Effect layer inside the same V8 isolate, not a child process or a separate binary. Second, the renderer never uses IPC for chat: bootstrap goes over `contextBridge`, but all message traffic rides the loopback HTTP interface with the basic-auth credentials handed to it at startup. + +### Same thing as a Mermaid diagram + +```mermaid +flowchart LR + subgraph Proc["Electron (one Node / V8 process)"] + direction LR + + subgraph Main["Main"] + direction TB + Preload["preload/index.ts
contextBridge 'api'
awaitInitialization · storeGet/Set ·
dialogs · killSidecar"] + Server["spawnLocalServer()
Hono @ 127.0.0.1:rnd port
basic auth: opencode / UUID
CORS: oc://renderer"] + subgraph Runtime["Effect runtime (in-proc)"] + direction TB + Bus["Bus (PubSub, fan-out to SSE)"] + SP["SessionPrompt.Service"] + Perm["Permission.Service"] + Tools["ToolRegistry.Service"] + LLM["LLM (Vercel AI SDK)"] + end + DB[("SQLite (Drizzle)
opencode.db
MessageTable · PartTable ·
SessionTable")] + end + + subgraph Render["Renderer (SolidJS)"] + direction TB + WinApi["window.api
{ url, username, password }"] + Sdk["global-sdk.tsx
SSE consumer +
per-frame coalescer (~16ms)"] + EB["Solid event bus
(keyed by directory)"] + Stores["Per-view reactive stores
(createStore)"] + UI["SolidJS components"] + end + end + + Preload -. "IPC (bootstrap only)" .-> WinApi + WinApi --> Sdk + Sdk -->|"(1) REST  POST /session/:id/message"| Server + Server -.->|"(2) SSE  GET /event
message.part.updated
message.part.delta { field, delta }"| Sdk + Sdk --> EB + EB --> Stores + Stores --> UI + + Server --> SP + SP --> LLM + SP --> Tools + Tools --> Perm + SP --> Bus + SP --> DB + Bus -.-> Server +``` + +Solid arrows = request / command direction. Dotted arrows = server-pushed events or IPC bootstrap. The renderer's only IPC call is to pick up `{ url, username, password }`; everything chat-shaped rides REST and SSE over loopback. + +## Topology + +Three tiers, but only two processes: + +1. **Electron main** — `packages/desktop-electron/src/main/index.ts`. Manages windows, lifecycle, IPC. Also hosts the agent server *in-process*. +2. **Sidecar server** — `packages/desktop-electron/src/main/server.ts::spawnLocalServer()` imports the Hono server from a virtual module (`virtual:opencode-server`) and calls `Server.listen()`. Not a child process; same Node runtime as main. +3. **Renderer** — `packages/desktop-electron/src/renderer/index.tsx`. SolidJS app. Talks to the sidecar over HTTP with credentials from the preload bridge. + +Boundaries: + +- `packages/opencode/src/server/server.ts` — the Hono server definition. +- `packages/desktop-electron/src/preload/index.ts` — `contextBridge.exposeInMainWorld("api", …)`. The only IPC surface is *bootstrap* (`awaitInitialization` returns `{ url, username, password }`, plus store get/set, dialogs, clipboard, `killSidecar`). Chat itself never goes through IPC. + +Port is chosen dynamically (TCP port 0, OS picks). CORS on the server is restricted to the custom scheme `oc://renderer`. Main waits on `GET /global/health` in a ~100 ms poll loop before allowing the renderer to finish initialization (`main/index.ts` ~145-196). + +## Transport + +Two channels: + +- **REST (request/response)** — session CRUD, message listing, metadata, and *sending* a user message. The key endpoint is `POST /session/:sessionID/message` (`packages/opencode/src/server/routes/instance/session.ts` ~846-891). It validates via Zod, calls `SessionPrompt.Service.prompt()`, and returns the final `MessageV2.WithParts` object. The handler *does* use Hono's `stream()` helper, but only calls `stream.write(JSON.stringify(msg))` once, after the turn completes — so functionally it behaves like a non-streamed JSON body. Real-time updates come over SSE, not over this stream. + +- **SSE (server → client push)** — one global event stream at `GET /event` (`packages/opencode/src/server/routes/instance/event.ts`). Started once on app init in `packages/app/src/context/global-sdk.tsx` (~140). All state changes — message parts created/updated, session status, LSP, etc. — are published to the internal `Bus` and flushed to every connected SSE client. + +SSE headers worth noting (`event.ts` ~36-38): + +``` +Cache-Control: no-cache, no-transform +X-Accel-Buffering: no +``` + +A 10 s server-side heartbeat (~51-58) keeps proxies from killing idle connections. Client treats >15 s of silence (`global-sdk.tsx` ~111) as dead and reconnects. + +Effectively: **writes are REST, reads are one always-on SSE pipe**. There is no WebSocket, no tRPC, no polling. + +## Server runtime + +Built on **Effect** (Effect-ts) plus the **Vercel AI SDK**. Not Mastra, not a bespoke harness loop in the style of `packages/chat` in our repo. + +Key services (all Effect Layers): + +- `SessionPrompt.Service` — `packages/opencode/src/session/prompt.ts` (~80+). Exposes `prompt(input)` for a one-shot user→AI turn, `loop(input)` for the agentic multi-step loop, and defers cancellation to `SessionRunState.Service`. +- `ToolRegistry.Service` — tool definitions and dispatch. +- `Permission.Service` — gatekeeper for tool execution; emits events when approval is needed. +- `LLM` — wraps Vercel AI SDK for model calls. +- `Bus` — `packages/opencode/src/bus/index.ts`. A layer-based PubSub: one unbounded Effect `PubSub` per event type plus a wildcard channel. Every state change goes through it, and the SSE route subscribes to all. + +The flow from `POST /session/.../message` is: parse → resolve session → hydrate history from SQLite → kick `SessionPrompt.prompt()` → Effect runtime drives the agent loop → each state change publishes events on the `Bus` → SSE fan-out pushes them to every connected renderer → server finally returns the terminal message on the REST response. + +## Message / event model + +The canonical shape is `MessageV2` (`packages/opencode/src/session/message-v2.ts`): + +- Each message has `.info` (metadata) and `.parts[]`. +- Part kinds (the `type` discriminator on each part): `text`, `reasoning`, `file`, `agent`, `compaction`, `subtask`, `retry`, `step-start`, `step-finish`, `tool`, `snapshot`, `patch`. Tool calls and their results live on the single `tool` part, not a separate `tool_result` kind — the result is carried in a nested `state` field whose shape changes as the tool runs. + +Two event types carry updates: + +- **`message.part.updated`** — full `Part` object. Used for part creation, tool results, and final snapshots. +- **`message.part.delta`** (~602-611): + ```ts + { + type: "message.part.delta", + properties: { sessionID, messageID, partID, field, delta } + } + ``` + `field` is the part field being updated (typically `"text"`); `delta` is the string to append. This is how token streaming is expressed — not full text replacement, not a unified diff. Just `append(delta)` into `part[field]`. + +A typical assistant turn looks like: + +``` +message.part.updated { part: { id, type: "text", text: "" } } +message.part.delta { field: "text", delta: "Hello" } +message.part.delta { field: "text", delta: " world" } +message.part.updated { part: { id, type: "text", text: "Hello world" } } +``` + +No global sequence numbers, no per-session counters. SSE ordering is the ordering guarantee. There is no `replayEvents` RPC and no gap detection on the client. + +## Client state: SolidJS + event bus + +No Redux, no Zustand, no central store. The renderer uses: + +- **SolidJS** fine-grained reactivity with `createStore` for reactive objects. +- **Solid Query** (`@tanstack/solid-query`) for REST fetches (session lists, history hydration). +- **`@solid-primitives/event-bus`** for the event stream: `event.on(directory, listener)` / `event.listen(directory)` exposed from `global-sdk.tsx`. + +On app init: + +1. `useServer()` picks an active server (there can be several — the same renderer can attach to multiple). +2. `useGlobalSDK()` builds the SDK client and starts the SSE subscription. +3. SSE events are dispatched onto the event bus keyed by `directory` (project/worktree). +4. Session views subscribe to the bus for their directory and mutate reactive stores accordingly. +5. SolidJS picks up the reactive read and re-renders only the affected DOM nodes. + +The pattern is unusual but coherent: each UI screen is its own little reducer over the event stream, keeping its own local reactive store. There is no global chat state object. + +### Event coalescing + +`global-sdk.tsx` (~54-95) batches incoming events by key and flushes once per animation frame (~16 ms). For a given `partID` field, intermediate deltas may be discarded if a full `message.part.updated` arrives before flush (~170-172). This keeps the UI smooth under high-frequency LLM token streams but is a place where an overeager renderer will miss intermediate states. + +## Send flow, end-to-end + +1. **Compose.** SolidJS composer captures text, attachments, context. +2. **REST call.** `sdk.session.prompt({ sessionID, parts: [{ type: "text", text: "hello" }] })` → `POST /session/{sessionID}/message`. +3. **Server accepts.** `session.ts` handler validates, loads history from SQLite, calls `SessionPrompt.Service.prompt()`. +4. **Agent loop runs.** `LLM.stream()` via the Vercel AI SDK drives token generation. For each incremental chunk, the loop publishes `PartDelta` to the `Bus`. Tool execution publishes `PartUpdated` events with tool-result parts. +5. **Fan-out.** The SSE route (`/event`) is already subscribed to the Bus; each event is serialized as `data: {...}\n\n`. +6. **Renderer consumes.** A `for await (const event of events.stream)` loop in `global-sdk.tsx` dispatches to the Solid event bus. +7. **Local reducers.** Page-level stores (session view, message list) update themselves reactively. +8. **SolidJS renders.** Fine-grained reactivity means only the affected DOM node updates. +9. **REST returns.** The original `POST /message` resolves with the final `MessageV2.WithParts`. The UI usually already reflects this state from the SSE stream by the time REST returns. + +Note the ordering: the SSE stream is often *ahead of* the REST response. The REST call is effectively a synchronous "start this and wait for completion" with the real-time updates coming out-of-band. + +## Tool approvals and interrupts + +Approval *requests* ride the SSE bus; approval *replies* are a dedicated typed REST endpoint. + +- When a tool is about to run, the agent loop consults `Permission.Service`. If the user hasn't allowed this tool, a permission event is published to the Bus and the agent loop blocks on the Effect primitive waiting for the user's response. +- Client sees the event via SSE, shows inline UI or a dialog, and submits the reply via `POST /permission/:requestID/reply` (`packages/opencode/src/server/routes/instance/permission.ts`, operationId `permission.reply`). Body shape: `{ reply: Permission.Reply, message?: string }`. `Permission.Service.reply({ requestID, reply, message })` resolves the blocked Effect. +- `GET /permission` (operationId `permission.list`) lets the client enumerate all pending approvals across sessions. +- Cancellation: `SessionRunState.Service.cancel(sessionID)` flips a cancellation flag; the agent loop checks it at natural boundaries and exits. The renderer calls this via the session API. + +So the approval protocol is actually quite disciplined — it's a typed `(requestID, reply)` request/response, just carried on the REST side rather than in a single orchestration command stream like t3code's `thread.approval.respond`. + +## Persistence + +SQLite via Drizzle ORM. + +- Drizzle schema at `packages/opencode/src/session/session.sql.ts`. SQL table names are lowercase (`session`, `message`, `part`); the TS exports are `SessionTable`, `MessageTable`, `PartTable`. +- `MessageTable` columns: `id`, `session_id`, `data` (JSON, typed as `InfoData` — role/streaming/etc. live inside the JSON blob), plus `Timestamps`. +- `PartTable` columns: `id`, `message_id`, `session_id`, `data` (JSON, typed as `PartData` — the part kind lives inside the JSON blob), plus `Timestamps`. +- `SessionTable` carries richer metadata: `id`, `project_id`, `workspace_id`, `parent_id`, `slug`, `directory`, `title`, `version`, `share_url`, summary counters, `revert` snapshot, permission ruleset, plus `Timestamps`. +- Path: `$XDG_DATA_HOME/opencode/opencode.db` (defaults under `~/.local/share/opencode/`). +- On app start, main ensures the file exists; `JsonMigration.run()` initializes or upgrades the schema (`main/index.ts` ~140-217). +- History is loaded on demand: `Session.list()` for the sidebar, `Session.get(sessionID)` hydrates a session with all its parts joined. + +No event log, no projections, no replay. Once a `message.part.updated` commits, the prior deltas are forgotten. Deltas in flight during a crash are lost — the REST response is the commit boundary, not the individual SSE events. + +## Reconnect and resumability + +Minimal. The client simply reopens the SSE stream on failure: + +- Retry loop with 250 ms delay (`global-sdk.tsx` ~201). +- 15 s heartbeat timeout; if no server event in that window the client aborts and reconnects. +- On reconnect, the client re-fetches message history via REST — there is no "resume from sequence N" mechanism. If the server crashed mid-turn, the turn is just gone. + +This works because SQLite is the source of truth for completed messages and because users tolerate the occasional lost in-flight turn. It would not work in a collaborative setting where multiple clients share a session and need identical state. + +## Electron-specific wiring + +- **Preload** (`src/preload/index.ts`) exposes a minimal `api` via `contextBridge`. All chat traffic bypasses IPC; only bootstrap, settings, and platform features (dialogs, clipboard, notifications) cross the bridge. +- **Sidecar boot** (`src/main/server.ts::spawnLocalServer`, `src/main/index.ts` ~44-50): + - Random port via `getSidecarPort()` (OS-assigned). + - Server instance lives in the same V8 isolate as Electron main. + - Password: random UUID generated per launch; handed to renderer via preload. + - Health check: poll `GET /global/health` every ~100 ms until 200. +- **Custom protocol**: `oc://renderer` is registered and used as the sole allowed CORS origin. `opencode://` deep links are also registered for file associations and external launch URLs (`main/index.ts` ~114). +- **Shutdown**: `killSidecar` IPC handler triggers a graceful server stop on app quit. + +## Noteworthy patterns + +- **In-process sidecar.** Zero IPC overhead; the "server" is just another Effect layer running in the same process. Radically simple to deploy and reason about. Tradeoff: crash in main kills the runtime; no independent restart. +- **HTTP + single global SSE pipe.** One durable subscription for *all* state, filtered on the client by session/directory. Much simpler than per-resource subscriptions. +- **Delta events with a `field` selector.** Token streaming expressed as `append(delta)` into a named field on a part, rather than as token objects or diffs. The client's apply function is three lines. +- **Per-frame event coalescing (`~16 ms`).** Caps render cost regardless of server output rate. +- **Custom URL scheme as CORS origin.** Keeps the sidecar inaccessible from stray `http://localhost` origins even if the port leaked. +- **Effect-first codebase.** Services, DI, and error handling are all Effect; the same patterns compose from REST handlers down into LLM calls. + +## Things that are fragile (or that we'd do differently) + +- **No sequence numbers.** SSE ordering is the only ordering guarantee. A dropped event between heartbeats is silently lost; the client only self-heals by re-fetching history. For a local single-user app this is fine; for anything with multiple clients or mobile backgrounding it isn't. +- **No replay protocol.** Once the connection drops you refetch the whole session. Fine for small histories, rough for long agent sessions with many parts. +- **REST body is the final message.** The "send" call blocks until the turn completes. Any UI that doesn't already consume the SSE stream will look frozen during long turns. +- **Random-port + password in memory.** Elegant but means closing and reopening the window invalidates the credentials; everything downstream has to refetch them through the preload bridge. No way to share session with a second client process. +- **Delta coalescing can drop intermediate states.** If a full `message.part.updated` lands before the frame flush, pending deltas for that part are discarded. Usually what you want; occasionally surprising when debugging. +- **Approval transport is split across two channels.** Request events come over SSE; replies go over a separate REST endpoint (`POST /permission/:requestID/reply`). It's fully typed — not ad-hoc — but a client has to wire both sides independently, unlike t3code where request and reply are both commands/events on the same orchestration stream. + +## Signal for our rearchitecture + +Ranked by relevance to our current problem: + +1. **Single global event stream, scoped on the client.** Similar to t3code's shell-plus-detail split. For our workspace-scoped chat, this would become one host-service subscription the client routes by session/workspace. This is the transport shape we probably want. +2. **Delta-as-append.** `{ field, delta }` is dead simple and avoids both token-object complexity and unified-diff complexity. Compare to t3code's unified-diff stream — OpenCode's is considerably cheaper to implement. +3. **REST write + SSE read.** If we don't want tRPC subscriptions on WebSockets, this is a viable alternative: keep mutations as plain tRPC queries/mutations and open one SSE endpoint for events. Host-service already has Hono; adding an SSE route is trivial. +4. **In-process sidecar architecture.** Close cousin of our host-service direction. Ours is explicitly a separate process for reasons (multi-client, mobile, web parity), but the *ownership* story is the same: one runtime, multiple subscribers. +5. **What NOT to take.** The lack of sequence numbers and replay. This is the main thing t3code does better and the main thing we need given our desktop/web/mobile story and reconnect requirements. diff --git a/plans/t3code-chat-architecture-reference.md b/plans/t3code-chat-architecture-reference.md new file mode 100644 index 00000000000..7c5da26975f --- /dev/null +++ b/plans/t3code-chat-architecture-reference.md @@ -0,0 +1,379 @@ +# T3 Code Chat Architecture — Reference Notes + +Research notes on how [T3 Code](./temp/t3code) implements its chat system. Written as background for the v2 chat transport rearchitecture (see `host-service-chat-architecture.md` and `chat-mastra-rebuild-execplan.md`). All file paths below are relative to `temp/t3code/` unless noted. + +## TL;DR + +T3 Code is **event-sourced**. The server owns an append-only, sequence-numbered event log in SQLite. Clients connect via an **Effect-native WebSocket RPC** (not tRPC), open two long-lived subscriptions, and apply each event to a **Zustand store**. User actions are commands that dispatch through the same RPC; their effects come back as events. On reconnect, clients detect sequence gaps and call `replayEvents(from, to)` to catch up. There is no polling. + +## Architecture diagram + +```mermaid +flowchart LR + subgraph Client["Client (web / electron renderer, React 19 + Zustand)"] + direction TB + UI["React components"] + Store["Zustand store
threadShell · messages ·
activities · turnDiffs"] + Recovery["Recovery coordinator
tracks latestSequence /
highestObservedSequence"] + UI <--> Store + Recovery -.-> Store + end + + subgraph RPC["Effect RPC over WebSocket (/ws)"] + direction TB + Cmd["dispatchCommand (req/res)"] + Replay["replayEvents(from, to) (req/res)"] + Shell["subscribeShell (stream)"] + Detail["subscribeThread(id) (stream)"] + end + + subgraph Server["apps/server (Node + Effect)"] + direction TB + Engine["OrchestrationEngine"] + Dedup["CommandReceipts
(idempotency by commandId)"] + Log[("SQLite event log
append-only · seq-numbered")] + Proj[("Projections
messages · activities ·
approvals · sessions · turns")] + Ingest["ProviderRuntime
Ingestion reactor"] + subgraph Providers["ProviderService
ProviderSessionStatus:
connecting · ready · running · error · closed"] + direction LR + Claude["ClaudeAdapter"] + Codex["CodexAdapter"] + Cursor["CursorAdapter"] + Op["OpenCodeAdapter"] + end + Engine --> Dedup + Engine --> Log + Log --> Proj + Engine --> Providers + Providers --> Ingest + Ingest --> Log + end + + Store -->|writes: commands| Cmd + Cmd --> Engine + Recovery -->|gap detected| Replay + Replay --> Log + Shell --> Store + Detail --> Store + Log -.->|fan-out| Shell + Log -.->|fan-out| Detail + Proj -.->|initial snapshot| Shell + Proj -.->|initial snapshot| Detail +``` + +Solid arrows = request / command direction. Dotted arrows = server-pushed events or snapshots. Note the two **independent** subscription streams (`subscribeShell` and `subscribeThread`): they both read from the same event log but the client treats them as disjoint writers into different regions of the store. + +### Same thing in plain text + +``` +┌──────────────── CLIENT (apps/web, apps/desktop renderer) ────────────────┐ +│ │ +│ React components │ +│ ▲ │ +│ │ │ +│ Zustand store (partitioned by which stream writes it) │ +│ ├─ threadShellById / sidebarThreadSummaryById ◀─ shell stream only │ +│ ├─ messageByThreadId / messageIdsByThreadId ◀─ detail stream only │ +│ ├─ activityIdsByThreadId ◀─ detail stream only │ +│ └─ turnDiffSummaryByThreadId ◀─ detail stream only │ +│ ▲ │ +│ │ │ +│ reducers: │ +│ applyEnvironmentOrchestrationEvent(state, event, env) per-event │ +│ syncServerThreadDetail(state, thread, env) full snapshot │ +│ ▲ │ +│ │ │ +│ createOrchestrationRecoveryCoordinator(...) │ +│ latestSequence · highestObservedSequence │ +│ classify each event: ignore / defer / recover / apply │ +│ │ +└──────┬────────────────────────────────────────────────────────────▲───────┘ + │ │ + │ commands events │ + ▼ │ + ┌─────────────── Effect RPC over WebSocket (/ws) ─────────────────────┐ + │ │ + │ (1) dispatchCommand req/res client → server │ + │ (2) replayEvents(from, to) req/res client → server (on gap) │ + │ (3) subscribeShell stream server → client (1 per conn) │ + │ (4) subscribeThread(id) stream server → client (per open) │ + │ │ + └────┬─────────────────────────────────────────────────────────▲───────┘ + │ │ + ▼ │ +┌──────────────────── SERVER (apps/server, Node + Effect) ──────────────────┐ +│ │ +│ ws.ts (Effect RPC handler) │ +│ │ │ +│ ▼ │ +│ OrchestrationEngine │ +│ ├─ CommandReceipts (idempotency by commandId) │ +│ │ │ +│ ├──▶ SQLite event log (append-only, global monotonic `sequence`) │ +│ │ ├─ thread.message-sent { streaming: bool } │ +│ │ ├─ thread.turn-start-requested │ +│ │ ├─ thread.turn-diff-completed { diff: unified diff } │ +│ │ ├─ thread.activity-appended (tool calls, errors, …) │ +│ │ ├─ thread.approval-response-requested │ +│ │ ├─ thread.user-input-response-requested │ +│ │ ├─ thread.session-set · thread.reverted │ +│ │ └─ project.* │ +│ │ │ │ +│ │ ▼ │ +│ │ Projections (materialized reads in SQLite) │ +│ │ projection_thread_messages · …_activities │ +│ │ projection_pending_approvals · …_thread_sessions │ +│ │ projection_thread_turns │ +│ │ │ +│ └──▶ ProviderService (per-thread ProviderSessionStatus: │ +│ connecting · ready · running · error · closed)│ +│ ├─ ClaudeAdapter │ +│ ├─ CodexAdapter │ +│ ├─ CursorAdapter │ +│ └─ OpenCodeAdapter │ +│ │ │ +│ ▼ │ +│ ProviderRuntimeIngestion │ +│ translates ProviderRuntimeEvent → orchestration events │ +│ and appends them back into the event log │ +│ │ +└────────────────────────────────────────────────────────────────────────────┘ +``` + +## Topology + +- `apps/server` — Node.js + Effect process. Single source of truth. +- `apps/web` — Vite/React client. +- `apps/desktop` — Electron shell around the web app. +- `packages/contracts` — shared schemas (events, commands, errors). +- `packages/client-runtime` — shared client wiring. + +Both clients (web + desktop renderer) talk to the server the same way: one WebSocket to `/ws`. The server can back any number of concurrent clients on the same environment. + +## Transport: Effect RPC over WebSocket + +Not tRPC. T3 uses `effect/unstable/rpc` with `effect/unstable/socket/Socket`. + +- Client: `apps/web/src/rpc/wsTransport.ts`, `apps/web/src/rpc/protocol.ts` (WS endpoint `/ws`, line ~46). +- Server: `apps/server/src/ws.ts`. +- Connection lifecycle / backoff: `apps/web/src/rpc/wsConnectionState.ts`. + +The transport exposes two shapes: + +1. **Request/response** — used for all write paths (e.g. `dispatchCommand`, `replayEvents`, `thread.approval.respond`). +2. **Subscriptions** — hot streams the server pushes to. Two are used in practice: `subscribeShell` and `subscribeThread(threadId)`. + +Writes are commands the client pushes; reads are event streams the server pushes. The client never polls. + +## Server runtime + +Chat is agent-agnostic. The server does not use Mastra or the Vercel AI SDK. Instead, each supported agent has an adapter implementing `ProviderAdapter`: + +- `apps/server/src/provider/Services/ProviderAdapter.ts` (interface) +- `apps/server/src/provider/Services/ClaudeAdapter.ts` +- `apps/server/src/provider/Services/CodexAdapter.ts` +- `apps/server/src/provider/Services/CursorAdapter.ts` +- `apps/server/src/provider/Services/OpenCodeAdapter.ts` + +`ProviderService` (`apps/server/src/provider/Services/ProviderService.ts`) is a cross-provider facade. Per thread it owns a `ProviderSession` whose status is one of `connecting | ready | running | error | closed` (see `ProviderSessionStatus` in `packages/contracts/src/provider.ts` ~26-32). A command like `thread.turn.start` calls `sendTurn()` on the chosen provider, which runs the agent and produces a stream of `ProviderRuntimeEvent`s. Those are ingested and turned into orchestration events (see next section). + +## The event log + +The central abstraction. Every state change — user message, assistant message, approval request, approval response, tool call, plan upsert, session state transition, revert — is a single `OrchestrationEvent`: + +```ts +// EventBaseFields — shared by every event in the union +{ + sequence: NonNegativeInt // monotonic, global + eventId: EventId + aggregateKind: "project" | "thread" + aggregateId: string + occurredAt: IsoDateTime + commandId: CommandId // the command that produced this event + causationEventId: EventId | null // event that caused this one (if chained) + correlationId: CorrelationId // groups a whole causal chain + metadata: { providerTurnId?, adapterKey?, ingestedAt?, requestId?, providerItemId?, ... } +} +// ...plus a type-specific discriminator and payload fields per event variant. +``` + +Schema: `packages/contracts/src/orchestration.ts` — `EventBaseFields` at ~945-955, event union at ~957+. Events are tagged structs (Effect `Schema.TaggedStruct` per variant), not a single `{ type, payload }` shape. There is no explicit `actor` field — the originating actor is inferred from `commandId` / `metadata`. + +Key event types: + +- `thread.message-sent` — user or assistant message added (assistant emits with `streaming: true` first, then `false` on completion). +- `thread.turn-start-requested` — turn initiated. +- `thread.turn-diff-completed` — streaming content delivered as a unified diff (see §streaming). +- `thread.activity-appended` — tool calls, errors, setup-script activity, etc. +- `thread.approval-response-requested` / `thread.approval.respond` — tool-call approvals. +- `thread.user-input-response-requested` / `thread.user-input.respond` — structured user input. +- `thread.proposed-plan-upserted` — plan generation. +- `thread.session-set` — FSM transitions. +- `thread.reverted` — revert to a checkpoint. +- Project events: `project.created`, `project.meta-updated`, `project.deleted`. + +Events are **immutable** and append-only. Sequence numbers are globally monotonic, not per-session. This makes cross-thread ordering straightforward and replay trivial. + +## Persistence + +SQLite. One event table plus a handful of projection tables that are derived from it for fast reads: + +- Event store interface: `apps/server/src/persistence/Services/OrchestrationEventStore.ts`. +- Projections: + - `projection_thread_messages` + - `projection_thread_activities` + - `projection_pending_approvals` + - `projection_thread_sessions` + - `projection_thread_turns` + +On startup (`serverRuntimeStartup.ts`) the server replays the log to rebuild the read models in memory. The shell stream is served from these projections so a freshly-connected client gets the computed sidebar state in one shot. + +## Client state: Zustand + two streams + +Store: `apps/web/src/store.ts` (~2k lines; state shape at ~40-90). + +```ts +interface EnvironmentState { + // Sidebar / session-level — written by shell stream + threadShellById: Record + sidebarThreadSummaryById: Record + + // Per-thread content — written only by detail stream + messageIdsByThreadId: Record + messageByThreadId: Record> + activityIdsByThreadId: Record + proposedPlanIdsByThreadId: Record + turnDiffSummaryByThreadId: Record> + + bootstrapComplete: boolean +} +``` + +The client runs two independent subscriptions: + +- **Shell stream** (`subscribeShell`) — one per connection. Broadcasts session state, sidebar summaries, and pending flags for every thread in the environment. Cheap and always-on. +- **Detail stream** (`subscribeThread(threadId)`) — one per currently-open thread. Delivers the full per-thread payload (messages, activities, turn diffs). + +Convention (documented in the store-file architecture comment): only the detail stream writes to per-thread content fields, only the shell stream writes to sidebar summary fields. Both may write to `threadShellById` / `threadSessionById`, but writes go through `writeThreadState()` which does structural equality to avoid redundant re-renders. + +This split is the thing that kills the race-condition class our current design has. The two streams don't fight for the same state, and cross-arriving events from the wrong stream are ignored by convention. + +Reducers: + +- `applyEnvironmentOrchestrationEvent(state, event, environmentId)` — per-event reducer for the shell stream. +- `syncServerThreadDetail(state, thread, environmentId)` — full-snapshot reducer for the detail stream; used on initial subscribe and on sequence-gap recovery. + +## Message send flow, end-to-end + +```mermaid +sequenceDiagram + autonumber + participant User + participant Store as Client store + participant WS as WebSocket RPC + participant Engine as OrchestrationEngine + participant Log as Event log + projections + participant Prov as Provider adapter + + User->>Store: submit message + Store->>Store: optimistic user message + Store->>WS: dispatchCommand(thread.turn.start, commandId) + WS->>Engine: command + Engine->>Engine: dedup by commandId + Engine->>Log: append thread.turn-start-requested + Log-->>WS: fan-out event + WS-->>Store: shell + detail apply event + Engine->>Prov: sendTurn() + loop streaming + Prov->>Engine: ProviderRuntimeEvent + Engine->>Log: append turn-diff-completed / activity-appended + Log-->>WS: fan-out + WS-->>Store: apply to turnDiffs / activities + end + Prov->>Engine: turn complete + Engine->>Log: append thread.message-sent (streaming:false) + Log-->>WS: fan-out + WS-->>Store: finalize assistant message + Store-->>User: render +``` + +Primarily in `apps/web/src/components/ChatView.tsx` (around 2610+). + +1. **Compose.** Client generates `messageId` and `commandId` (`newCommandId()`), inserts an optimistic user message into the store immediately. +2. **Dispatch.** RPC call `api.orchestration.dispatchCommand({ type: "thread.turn.start", threadId, message: { messageId, text, attachments }, modelSelection, titleSeed, runtimeMode, interactionMode, bootstrap?, createdAt })`. Note `messageId` / `text` / `attachments` are nested under `message`, not top-level on the command. +3. **Server accepts.** `apps/server/src/ws.ts` (~548) validates, deduplicates by `commandId` (see `OrchestrationCommandReceipts`), handles bootstrap (thread creation, worktree setup) if present, emits `thread.turn-start-requested`, routes to the provider adapter. +4. **Provider runs.** Adapter emits `ProviderRuntimeEvent`s. The ingestion reactor translates them into orchestration events: `thread.message-sent` (streaming), `thread.turn-diff-completed` (diffs), `thread.activity-appended` (tool calls), etc. +5. **Publish.** Events are appended to the event store, projections updated, pushed to all subscribers. +6. **Client applies.** Detail subscription gets the stream first (it's already open for the focused thread). Shell subscription updates the sidebar summary shortly after. +7. **Complete.** A final `thread.message-sent` with `streaming: false` is the authoritative terminal state for the assistant turn. + +If `dispatchCommand` fails the optimistic message is rolled back and the composer is restored. + +## Streaming assistant output + +`thread.turn-diff-completed` carries a **unified diff** against the in-progress turn, not token deltas or full snapshots. Schema: `ThreadTurnDiff` in `packages/contracts/src/orchestration.ts` (~1100). + +```ts +ThreadTurnDiff = TurnCountRange.mapFields(Struct.assign({ + threadId: ThreadId, + diff: Schema.String, // unified diff +})) +``` + +The client accumulates diffs into `turnDiffSummaryByThreadId`. When the authoritative `thread.message-sent` with `streaming: false` lands, that becomes the source of truth and the in-progress diff buffer is reconciled. + +The diff format keeps the wire size bounded even for long responses, which matters because the event log persists every event. + +## Tool calls, approvals, interrupts + +All in the same event stream. No side channels. + +Agent-initiated request → `thread.approval-response-requested` or `thread.user-input-response-requested` event. Client derives pending-approval UI via `derivePendingApprovals` in `apps/web/src/session-logic.ts`. + +User responds: + +```ts +dispatchCommand({ + type: "thread.approval.respond", + threadId, + requestId, + decision: "accept" | "decline" | "acceptForSession" | "cancel", +}) +``` + +`ProviderService.respondToRequest()` routes the answer back to the adapter, which unblocks the agent or fails the turn. + +Cancellation is `thread.turn.interrupt` → the provider session stops and emits `thread.session-stop-requested`. + +## Reconnect and replay + +Handled by `createOrchestrationRecoveryCoordinator` (a factory, not a class) in `apps/web/src/orchestrationRecovery.ts` (~88+). It returns a coordinator object that owns the recovery state. + +Client tracks two cursors: + +- `latestSequence` — highest sequence successfully applied. +- `highestObservedSequence` — highest sequence seen (may be ahead if events arrive out of order across the two streams). + +Every incoming event is classified as `ignore | defer | recover | apply`. If a gap is detected, the coordinator calls the RPC `replayEvents(fromSequence, toSequence)` to fetch the missing slice, applies it, and drains the deferred queue. + +Commands are idempotent by `CommandId` via the server-side `OrchestrationCommandReceipts` table, so retries on reconnect don't duplicate effects. + +This is the pattern that lets t3 be rude with the network and still be correct. + +## Things worth stealing + +- **Single monotonic sequence per environment.** Makes gap detection a subtraction. +- **Command IDs with server-side dedup.** Retries are free. +- **Dual stream (shell + detail) with a written-by-who convention.** Removes the race between session-level state and per-thread content that `getDisplayState()` + `listMessages()` causes today. This is the single highest-value idea. +- **Diff-based streaming in the event log.** Bounded wire size, full auditability. +- **Projections as a pattern, not just a perf trick.** Keeps the client's initial render cheap without coupling clients to the log shape. +- **One event type for approvals / questions / tool calls.** Not a side channel. +- **Explicit provider status (`connecting | ready | running | error | closed`).** Makes "is the agent running" a boolean derived from one field, not an inference across two polls. +- **Causation + correlation IDs on every event.** `causationEventId` chains events back to the event that spawned them; `correlationId` groups a whole causal chain (e.g. a turn). Useful for debugging and for ordering beyond bare sequence numbers. + +## Things to approach carefully + +- **Effect RPC.** Nice ergonomics for t3, but we're already tRPC-shaped. Porting the *patterns* (subscriptions, sequenced events, replay RPC) to tRPC subscriptions over WS gets us 90% of the value without switching RPC systems. +- **Event-sourced everything.** t3 pays a persistence cost on every state change. For us, only the *transport* race needs fixing; whether the chat store becomes fully event-sourced on disk is a separate question from whether the wire protocol is event-driven. +- **Global sequence vs per-session sequence.** Global is cleaner for multi-thread clients (sidebars), but per-session is simpler to implement on top of the existing harness subscription. Pick one and commit. +- **Unified-diff streaming format.** Clever, but requires a diff library on client and server and adds complexity vs. "emit a `message_updated` event with latest full content." Worth it only if we care about wire size for very long turns. diff --git a/plans/v1-to-v2-fast-migration.md b/plans/v1-to-v2-fast-migration.md new file mode 100644 index 00000000000..e6bbfaeb644 --- /dev/null +++ b/plans/v1-to-v2-fast-migration.md @@ -0,0 +1,384 @@ +# V1 -> V2 Fast Migration Plan + +A pragmatic plan to ship the v1 chat UX on top of the existing v2 host-service chat architecture. This is **not** the full event-log rearchitect from `v2-chat-greenfield-architecture.md`, and it should **not replace** the host-service chat work that already exists. The current host-service implementation is the foundation; this plan updates the remaining migration work around it. + +## TL;DR + +- **Keep** the existing host-service chat implementation: + - `packages/host-service/src/runtime/chat/chat.ts` + - `packages/host-service/src/trpc/router/chat/chat.ts` + - `packages/host-service/src/providers/model-providers/` + - `packages/workspace-client` +- **Keep** v1 client UX code where possible: `ChatPane`, `ChatPaneInterface`, `useChatPaneController`, `useChatDisplay`, composer, approval/question dialogs, model picker, MCP UI. +- **Do not move canonical session metadata into host-service.** Cloud remains the owner of `chat_sessions`; host-service owns local runtime execution. +- **Collapse** the dual-poll race with `getSnapshot()` on the existing host-service chat router, then wire clients to consume that snapshot. +- **Add a compatibility/adaptation layer** between v1's `chatRuntimeServiceTrpc.session.*` shape and host-service's `workspaceTrpc.chat.*` shape instead of rewriting host-service around v1. +- **Ship behind a per-workspace flag** so the old Electron IPC chat runtime remains a rollback path during bake. + +Scope: roughly 1-2 weeks of implementation plus bake time, assuming the existing host-service chat runtime stays in place and the migration focuses on parity, adapter wiring, and rollout. + +### Fixes at a glance + +Quick scan of every concrete fix in this plan, ordered by priority. Each links to its phase below. + +| # | Severity | Fix | Phase | +|---|---|---|---| +| 1 | HIGH | Runtime disposal on session delete (no leak) | P0 Fix #1 | +| 2 | HIGH | Cross-workspace `sessionId` race in runtime creation | P0 Fix #2 | +| 3 | MEDIUM | Collapse `getDisplayState` + `listMessages` into single `getSnapshot` | P1 | +| 4 | MEDIUM | Drop `fps: 60` polling override at `ChatPaneInterface.tsx:287` | P1 | +| 5 | MEDIUM | Update cloud `lastActiveAt` after host send (selector ordering) | P1 | +| 6 | MEDIUM | Implement slash command resolution (currently stubs) | P4 | +| 7 | MEDIUM | Add `searchFiles` for `@file` mention autocomplete (missing entirely) | P4 | +| 8 | MEDIUM | Wire `SessionStart` / `SessionEnd` / `UserPromptSubmit` hooks (Stop / Notification hooks deferred) | P4 | +| 9 | MEDIUM | Wire title generation via cloud `chat.updateTitle` | P4 | +| 10 | MEDIUM | Decide Superset MCP tools strategy (defer or port) | P4 | +| 11 | MEDIUM | Decide MCP overview / auth strategy (defer or port) | P4 | +| 12 | LOW | Real model-provider auth state (no hardcoded `isAnthropicAuthenticated = true`) | P4 | +| 13 | LOW | Optional: validate `(sessionId, workspaceId)` against cloud at runtime create | P0 (decision) | +| 14 | LOW | Mastra memory store guard in `restartFromMessage` | Lower-Risk Notes | +| 15 | LOW | Comment on `process.env` mutation in `applyRuntimeEnv` | Lower-Risk Notes | +| 16 | LOW | Confirm `protectedProcedure` end-to-end | Lower-Risk Notes | + +P0 (HIGH) lands first as the prerequisite. P1 and P4 are independent of each other and can run in parallel after P0. P2-P3 (adapter, bootstrap migration) and P5-P6 (rollout, deletion) wrap around them. + +## Current Host-Service Chat State + +The host-service chat path already exists and should be preserved. + +| Area | Current implementation | Notes | +|---|---|---| +| Runtime owner | `packages/host-service/src/runtime/chat/chat.ts` | `ChatRuntimeManager` owns in-memory `RuntimeSession` instances keyed by `sessionId`. It resolves `workspaceId -> worktreePath`, creates Mastracode runtimes, and exposes chat runtime methods. | +| Router | `packages/host-service/src/trpc/router/chat/chat.ts` | Mounted as `chat` in `packages/host-service/src/trpc/router/router.ts`. Uses host-service auth and calls `ctx.runtime.chat.*`. | +| App wiring | `packages/host-service/src/app.ts` | Creates `ChatRuntimeManager` and mounts it as `runtime.chat`. | +| Model provider bridge | `packages/host-service/src/providers/model-providers/` | `LocalModelProvider` and `CloudModelProvider` implement `ModelProviderRuntimeResolver` for runtime env preparation. | +| Renderer client | `packages/workspace-client` | `workspaceTrpc` talks to host-service over local HTTP. The v2 workspace route already uses this path. | +| Existing v2 consumer | `apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/.../ChatPane` | Calls `workspaceTrpc.chat.*` directly with `{ sessionId, workspaceId }`. | +| Cloud session metadata | `packages/trpc/src/router/chat/chat.ts` | Cloud tRPC already has `createSession`, `deleteSession`, `updateTitle`, `uploadAttachment`, and `getModels`. | +| Legacy v1 runtime path | `packages/chat/src/server/trpc/service.ts` + Electron IPC router | Still powers the old `ChatPane` path today. | + +The migration should converge the old v1 pane onto this host-service path without discarding the host-service runtime/router. + +## Goals + +1. **Host-service remains the single owner of local chat runtime execution.** +2. **Preserve v1 UX parity** while swapping the runtime transport under it. +3. **Close the dual-poll race** with a single snapshot query on host-service and the legacy IPC path during migration. +4. **Keep cloud as the canonical session metadata owner** for `chat_sessions`, titles, attachments, and models. +5. **Keep rollback simple** by routing per workspace through either the existing Electron IPC runtime or host-service. +6. **Avoid blocking greenfield work.** This plan should make the host-service boundary stable so event-log work can build on top of it. + +## Non-Goals + +- Not replacing or rewriting the existing host-service chat runtime. +- Not moving canonical `chat_sessions` ownership from cloud Postgres into host-service. +- Not introducing the event log, sequence numbers, gap detection, or a durable local chat store. +- Not solving multi-device ownership or session-host affinity end-to-end. +- Not removing the v2 workspace chat path. That path is the proof point for host-service chat and should continue to improve. +- Not making provider credential handling elegant in this migration. It only needs to preserve current working behavior and leave the cleaner abstraction for follow-up. + +## Implementation Audit + +Detailed walk of the existing host-service chat code, what's load-bearing, what's stubbed, and what's actually broken. The migration phases below reference this section. + +### What's solid (don't rewrite) + +1. **Mastra harness lifecycle** in `ChatRuntimeManager` — `init()` → `setResourceId()` → `selectOrCreateThread()` → event subscription. The structure is right. +2. **Concurrent-creation guard** via `runtimeCreations` map (lines 316, 442-450) prevents two requests for the same session from both spinning up runtimes — but see [Bug #2](#verified-bugs) below for a real defect in the keying. +3. **Error normalization** (lines 189-228) strips `AI_APICallError` prefix and extracts nested error messages. UX-load-bearing, easy to break, leave alone. +4. **Workspace DB resolution** at create time (line 392) — `workspaceId` → `worktreePath` lookup is cleaner than v1's `cwd` passthrough. +5. **Restart-from-message** (lines 247-310) uses Mastra's memory store correctly to clone the thread and re-send from a target message. +6. **AGENTS.md injection** (lines 359-381) only writes if missing or previously written by Superset — safe re-entrance. +7. **Model provider abstraction** (`CloudModelProvider` / `LocalModelProvider`) gates runtime creation on `hasUsableRuntimeEnv()` and tracks env keys for cleanup. Right shape. + +### Stubbed in `ChatRuntimeManager` (lines 594-635) + +```ts +getSlashCommands() → [] // TODO +resolveSlashCommand() → { handled: false } // TODO +previewSlashCommand() → { handled: false } // TODO +getMcpOverview() → { sourcePath: null, servers: [] } // TODO +``` + +The router exposes these procedures and the v2 ChatPane renders the surfaces, so users see slash menus and MCP affordances that don't actually work. + +### Missing from the router entirely + +- **`searchFiles`** — v1 had `workspace.searchFiles` (delegated to `@superset/workspace-fs/host`). Without it, `@file` mention autocomplete is dead. +- **`authenticateMcpServer`** — v1 had OAuth callback for new MCP servers. With MCP currently stubbed anyway, this is downstream of `getMcpOverview`. + +### Behaviors v1 runs that host-service runtime doesn't + +| Behavior | v1 location | Host-service status | +|---|---|---| +| `runSessionStartHook()` after init | `packages/chat/src/server/trpc/utils/runtime/runtime.ts:130` | Not called. Host only sets hook session id at `chat.ts:408`. | +| `runSessionEnd()` on teardown | v1 hook manager | Not called. Also no teardown path exists. | +| `onUserPromptSubmit()` before send | v1 hook manager | Not called. | +| `getSupersetMcpTools()` loaded | v1 `service.ts:113-116` | Not loaded. | +| `generateAndSetTitle()` after first / 10th send | v1 `runtime.ts:457`, `service.ts:281` | Not called. | +| `subscribeToSessionEvents` with `onLifecycleEvent` callback | v1 | Only error / sandbox events surfaced; lifecycle callback not exposed. | +| `mcpManualStatuses` per-runtime tracking | v1 | Not present. | + +### Contract differences (the adapter layer) + +| Concern | v1 | Host-service | +|---|---|---| +| Session id input | `{ sessionId, cwd }` | `{ sessionId, workspaceId }` | +| Namespace | `session.*` + `workspace.*` (split) | `chat.*` (flat) | +| Approval reply | `session.approval.respond` | `chat.respondToApproval` | +| Question reply | `session.question.respond` | `chat.respondToQuestion` | +| Plan reply | `session.plan.respond` | `chat.respondToPlan` | +| File search | `workspace.searchFiles` | _missing_ | +| MCP auth | `workspace.authenticateMcpServer` | _missing_ | + +The adapter layer is small but real: ~12 procedure renames, 2 missing procedures, payload-shape passthrough, and `cwd` → `workspaceId` resolution. + +### Verified bugs + +These are real defects in the current code, verified by reading the source. Listed in priority order. + +1. **Runtime leak on session delete (HIGH).** `useWorkspaceChatController.ts:105` calls cloud `deleteSession` after a confirmation, but the host-service runtime in `ChatRuntimeManager.runtimes` (chat.ts:315) has no dispose path. The router has no `endSession` / `disposeRuntime` procedure (chat.ts:31). Each abandoned session leaks a `RuntimeSession` for the lifetime of the host-service process. **Fix:** add `chat.endSession({ sessionId, workspaceId })` mutation that calls a new `ChatRuntimeManager.disposeRuntime(sessionId)`, run any session-end hooks, then drop from the map. Wire the call after cloud `deleteSession` succeeds. Also wire it on workspace deletion. + +2. **Cross-workspace sessionId race in runtime creation (HIGH).** `runtimeCreations` (chat.ts:316) is keyed by `sessionId` only. The check that an existing runtime's `workspaceId` matches the request (line 436) runs only on the *already-created* path, not on the *in-flight* path. So if creation for `(sessionId=X, workspaceId=A)` is mid-flight and a second request arrives for `(sessionId=X, workspaceId=B)`, the second request awaits the in-flight promise and receives a runtime bound to `workspaceA`. **Fix:** key the map by `${sessionId}:${workspaceId}`, or store the workspaceId on the in-flight promise and reject mismatches at line 442. Easy to fix, real bug under any concurrent-mount scenario (e.g., session opened in two windows). + +3. **v2 ChatPane polls at 60 fps (MEDIUM).** `WorkspaceChatInterface/ChatPaneInterface.tsx:287` passes `fps: 60` to `useWorkspaceChatDisplay`, which clamps the refetch interval to ~16 ms (`useWorkspaceChatDisplay.ts:14-16`). Combined with the still-separate `getDisplayState` + `listMessages` queries, that's ~120 RPCs per second per active chat pane. **Fix:** the `getSnapshot` collapse from §The Race Fix kills both birds — single query, sane cadence (4 fps matches v1 default). + +4. **Cloud `lastActiveAt` not updated on host send (MEDIUM).** `useWorkspaceChatController.ts:81` sorts the session selector by `lastActiveAt`. Host `sendMessage` (chat.ts:509) goes straight to the harness and never pings cloud. The cloud `chat_sessions.lastActiveAt` (`packages/trpc/src/router/chat/chat.ts:80`) only updates on metadata mutations. **Fix:** after a successful host send, host-service calls cloud `chat.updateSession({ lastActiveAt: now })` via its API client. Or: the v2 client fires a fire-and-forget cloud update alongside the host send. Either works; second is simpler. + +5. **Sessionid ↔ workspaceId not validated against cloud (MEDIUM).** Host-service trusts authenticated local callers to pair any `sessionId` with any local `workspaceId`. It validates only that the local workspace row exists (chat.ts:391). The PSK limits exposure, but a stronger binding would validate against cloud `chat_sessions.v2WorkspaceId` either at session-create time or on first runtime creation. **Fix:** either (a) one-time validation at runtime creation that checks cloud `chat_sessions.v2WorkspaceId === workspaceId`, or (b) require cloud to issue a short-lived binding token that host accepts. (a) is enough for this migration. + +6. **No host-service chat-specific tests.** Mastra harness behavior is exercised through other layers but the runtime manager has no targeted coverage for workspace binding, runtime reuse, snapshot consistency, or the bugs above. **Fix:** add tests as part of the corresponding fixes. + +## The Race Fix + +The highest-value behavior fix is still collapsing: + +```ts +getDisplayState() +listMessages() +``` + +into: + +```ts +chat.getSnapshot(input) -> { + displayState: ChatDisplayState + messages: Message[] + observedAt: number +} +``` + +On host-service, implement this on top of the existing `ChatRuntimeManager`. It should read `displayState` and `messages` inside one router procedure and return one response. Because `listMessages()` is async, this is best described as a **single server-side observation**, not a fully locked atomic snapshot. It still removes the client-side two-query race that causes mismatched message/display state. + +During migration, add the same procedure to the legacy Electron IPC runtime router so old-path users get the same client behavior. + +## Ownership Model + +| Concern | Owner during this migration | +|---|---| +| Local runtime execution | Host-service `ChatRuntimeManager` | +| Workspace cwd resolution | Host-service via `workspaceId` | +| Runtime credentials/env prep | Host-service model-provider resolver | +| Canonical chat session rows | Cloud tRPC / API | +| Session titles | Cloud tRPC `chat.updateTitle`, triggered by runtime owner when parity is restored | +| Attachments | Cloud tRPC `chat.uploadAttachment` | +| Old durable stream compatibility | Existing API routes until explicitly retired | +| Renderer UX | Existing v1 chat UI, adapted to host-service transport | + +## Phased Migration + +Each phase should be a separate PR or small PR stack. + +### P0 - Critical Bug Fixes (Verified Bugs #1, #2) + +**Goal:** close the two HIGH-severity defects in the existing host-service chat runtime before any rollout work. Both are surgical changes (≤50 lines each) and unblock everything else. + +#### Fix #1 — Runtime disposal on session delete + +References Verified Bug #1 in §Implementation Audit. + +- [ ] Add `disposeRuntime(sessionId): Promise` to `ChatRuntimeManager` (`packages/host-service/src/runtime/chat/chat.ts`): + - [ ] Look up `RuntimeSession` by `sessionId`. + - [ ] If present, run any session-end hook (placeholder ok if hook wiring lands later). + - [ ] Call `harness.abort()` and any `harness.destroy()` / cleanup the harness exposes. + - [ ] Delete from `runtimes` map. + - [ ] Idempotent — disposing an unknown session id is a no-op. +- [ ] Add `chat.endSession({ sessionId, workspaceId })` mutation to `packages/host-service/src/trpc/router/chat/chat.ts`. +- [ ] Wire client call sites: + - [ ] `useWorkspaceChatController.ts:105` (after cloud `deleteSession` succeeds). + - [ ] Workspace deletion flow — when a workspace is deleted, dispose all runtimes for sessions bound to it. +- [ ] Test: dispose then re-send to the same `sessionId` creates a fresh runtime; the map size returns to baseline after dispose. + +#### Fix #2 — Cross-workspace sessionId race in runtime creation + +References Verified Bug #2 in §Implementation Audit. + +- [ ] In `ChatRuntimeManager` (`chat.ts:316`), change `runtimeCreations` keying from `string` (sessionId) to `${sessionId}:${workspaceId}` — OR — keep the sessionId key and store `{ workspaceId, promise }` so awaiting code can validate the workspace match before returning. +- [ ] Apply the same workspace-mismatch guard that exists for already-created runtimes (`chat.ts:436`) to the in-flight path (`chat.ts:442`). A request whose `workspaceId` does not match the in-flight creation's workspace must throw, not silently get the wrong runtime. +- [ ] Test: concurrent calls with `(sessionId=X, workspaceId=A)` and `(sessionId=X, workspaceId=B)` resolve to two distinct runtimes (or one rejects with a clear "session bound to other workspace" error). Today's behavior silently shares the in-flight promise. + +#### General hardening + +- [ ] Add lightweight tests for the existing happy paths so regressions don't sneak in alongside the bug fixes: + - [ ] workspace-bound runtime creation + - [ ] same `sessionId` reused in same workspace returns the same runtime + - [ ] router procedures call the runtime manager with `{ sessionId, workspaceId }` + +**Acceptance:** runtime leaks are gone, cross-workspace race cannot happen, basic test coverage exists for the manager. + +### P1 - Add `getSnapshot` And Fix Polling Cadence (Verified Bugs #3, #4) + +**Goal:** one query per poll cycle, sane cadence, and host-side cloud `lastActiveAt` updates so the session selector keeps reordering correctly. + +#### Snapshot procedure + +- [ ] Add `workspaceTrpc.chat.getSnapshot({ sessionId, workspaceId })` to host-service. Returns `{ displayState, messages, observedAt }` from a single handler invocation. +- [ ] Add legacy `chatRuntimeServiceTrpc.session.getSnapshot` to the Electron IPC runtime path with the same shape. +- [ ] Implementation note: read `displayState` and `listMessages()` inside one router function; one server-side observation, not a fully locked atomic snapshot. Document this in code. + +#### Client cutover + +- [ ] Update host-service-backed v2 chat display (`useWorkspaceChatDisplay`) to consume `getSnapshot`. +- [ ] Update shared/v1 `useChatDisplay` to consume legacy `getSnapshot`. +- [ ] Update optimistic-message cache writes to target the snapshot cache, or invalidate/refetch the snapshot after cross-session sends. +- [ ] Keep `getDisplayState` and `listMessages` alive on both surfaces until every caller is migrated; delete in P6. + +#### Polling cadence (Verified Bug #3) + +- [ ] Drop the `fps: 60` parameter at `ChatPaneInterface.tsx:287`. Default in `useWorkspaceChatDisplay` is `fps: 4`, which matches v1 and is the right cadence for a polled chat. 60 fps means ~120 RPCs/sec per active pane today. +- [ ] Confirm there are no other call sites passing high `fps`. Grep `useWorkspaceChatDisplay` callers; flag any non-default `fps` for review. + +#### Cloud `lastActiveAt` update on host send (Verified Bug #4) + +- [ ] After a successful host `sendMessage`, update cloud `chat_sessions.lastActiveAt` so the session selector (`useWorkspaceChatController.ts:81`) keeps reordering after activity. Two viable shapes: + - [ ] **Host-side**: host-service's API client calls cloud `chat.updateSession({ sessionId, lastActiveAt: now })` after a successful send. Single source of truth, no extra client code. + - [ ] **Client-side**: v2 client fires a fire-and-forget `apiTrpcClient.chat.updateSession` alongside the host send. +- [ ] Pick one (recommend host-side) and implement. Verify selector reorders after a send. + +**Acceptance:** client chat display uses one polling query on both old and host-service paths, default polling is 4 fps, sending a message reorders its session to the top of the selector. + +### P2 - Add V1 Compatibility Adapter For Host-Service + +**Goal:** allow the old `ChatPane` UX to talk to host-service without reshaping host-service around the v1 router. + +- [ ] Add a client-side adapter or provider resolver that exposes the v1 command surface while internally calling `workspaceTrpc.chat.*`. +- [ ] Map v1 `{ sessionId, cwd }` inputs to host-service `{ sessionId, workspaceId }` inputs at the renderer boundary. +- [ ] Keep v1 UI components unchanged where possible. +- [ ] Ensure `sendMessage`, `restartFromMessage`, `stop`, approvals, questions, plans, and snapshot reads all route through the adapter. +- [ ] Add a per-workspace flag to choose Electron IPC runtime vs host-service runtime. +- [ ] Add a dev-only backend indicator for QA. + +**Acceptance:** flipping the flag for a workspace switches v1 chat runtime traffic to host-service with the same visible UI. + +### P3 - Move V1 Session Bootstrap Off REST, But Keep It Cloud-Owned + +**Goal:** stop `useChatPaneController` from calling `/api/chat/[sessionId]` directly while keeping canonical metadata in cloud. + +- [ ] Replace v1 `fetch('/api/chat/:sessionId')` session create/delete calls with cloud tRPC `apiTrpcClient.chat.createSession` / `deleteSession`. +- [ ] If v1 workspaces still need the legacy `workspaceId` column instead of `v2WorkspaceId`, extend cloud tRPC carefully rather than moving this concern to host-service. +- [ ] Keep REST routes alive for one release as compatibility/fallback because they also manage durable-stream behavior. +- [ ] Preserve `createSessionInitRunner` retry/toast/reporting behavior. +- [ ] Verify session listing still flows through Electric `chatSessions` collections. + +**Acceptance:** fresh v1 clients no longer call the REST session bootstrap routes, but cloud remains the session metadata owner. + +### P4 - Fill Host-Service Parity Gaps + +**Goal:** make the host-service path match v1 behavior closely enough for canary. References the gap list in §Implementation Audit. + +#### Slash commands (currently stubs at `chat.ts:594-635`) + +- [ ] Port slash-command discovery/resolution from `packages/chat/src/server/desktop/slash-commands/` to host-service. +- [ ] Implement `getSlashCommands` so it returns project + global commands (instead of `[]`). +- [ ] Implement `resolveSlashCommand` and `previewSlashCommand` so prompts substitute correctly (instead of `{ handled: false }`). +- [ ] Verify project-scoped (`.claude/commands`, `.agents/commands`) and global (`~/.claude/commands`) sources both resolve. + +#### File mention search (missing from router entirely) + +- [ ] Add `chat.searchFiles({ workspaceId, query, ... })` procedure to host-service. +- [ ] Wire to `@superset/workspace-fs/host` (already used elsewhere). Match v1's `workspace.searchFiles` shape so the renderer adapter is trivial. +- [ ] Verify `@file` mention autocomplete works in the host-service-backed chat pane. + +#### Session lifecycle + user-prompt hooks (currently uncalled) + +Scope: `SessionStart`, `SessionEnd`, `UserPromptSubmit` only. **`Stop` and `Notification` hook events are intentionally deferred** — they aren't blocking for canary, they overlap with agent-status UI plumbing we're not chasing in this migration. + +- [ ] In `ChatRuntimeManager.createRuntime` (after `setResourceId`, around line 408): call `runSessionStartHook()` analogous to v1 `runtime.ts:130`. +- [ ] In `ChatRuntimeManager.disposeRuntime` (added in P0 Fix #1): call `runSessionEnd()` before tearing down. +- [ ] In `ChatRuntimeManager.sendMessage` (line 509): call `onUserPromptSubmit()` before delegating to harness; respect a "blocked" return. +- [ ] Reload hook config on session re-access (matches v1 `reloadHookConfig`). +- [ ] Verify a user-defined `.claude/*.hooks.ts` `SessionStart` / `UserPromptSubmit` / `SessionEnd` hook actually fires. + +#### Title generation (currently not called) + +- [ ] Wire `generateAndSetTitle()` after the first user message and every 10th message — analogous to v1 `runtime.ts:457` and `service.ts:281`. +- [ ] Persist via cloud tRPC `chat.updateTitle({ sessionId, title })` so titles survive across devices. + +#### Superset MCP tools (currently not loaded) + +- [ ] Decide product policy: do host-service-backed chat sessions get Superset's built-in MCP tools (analytics queries etc.), or only user-configured MCP? +- [ ] If yes: load `getSupersetMcpTools()` analogous to v1 `service.ts:113-116` during runtime creation. +- [ ] If no: explicitly note in code so the gap isn't accidentally re-opened. + +#### MCP overview / authentication (currently stubbed) + +- [ ] Decide the MCP strategy for canary: + - [ ] **Defer**: keep `getMcpOverview` returning empty and hide/limit the MCP UI surfaces in v2-workspace ChatPane so users don't see broken affordances. v1 already shipped with `ENABLE_MASTRA_MCP_SERVERS = false`, so this is a credible default. + - [ ] **Port**: implement `getRuntimeMcpOverview()` and `authenticateRuntimeMcpServer()` on host-service equivalents. +- [ ] If deferring: track Mastra MCP enable as separate follow-up. + +#### Lifecycle event forwarding — deferred + +`subscribeToSessionEvents` `onLifecycleEvent` callbacks (agent start/stop, permission request notifications, etc.) are out of scope. Polling `getSnapshot` already covers what the UI needs for canary; push-style lifecycle notifications belong with the event-log work in `v2-chat-greenfield-architecture.md`. + +#### Model-provider auth / status UI + +- [ ] Verify the model picker doesn't claim a provider is authenticated when host-service can't actually run it. Today some places hardcode `isAnthropicAuthenticated = true`. Plumb the real auth state through `LocalModelProvider.hasUsableRuntimeEnv()`. + +**Acceptance:** known v1 behaviors either work on host-service or have an explicit product decision to defer (with the deferred ones surfacing no broken UI). + +### P5 - Canary And Rollout + +**Goal:** ship host-service-backed chat safely. + +- [ ] Dogfood host-service chat for developer workspaces. +- [ ] Canary a small percentage of real workspaces. +- [ ] Monitor chat error rate, runtime creation failures, provider credential failures, and Sentry. +- [ ] Keep rollback as a flag flip back to Electron IPC. +- [ ] Bake for at least one release before deleting legacy paths. + +**Acceptance:** host-service chat handles the majority of canary traffic without elevated errors or parity regressions. + +### P6 - Delete Legacy Runtime Paths + +**Goal:** one runtime owner. + +- [ ] Delete the Electron-main `chatRuntimeService` runtime router after the host-service path is default-on and stable. +- [ ] Delete legacy dual-query procedures after every caller uses `getSnapshot`. +- [ ] Delete the client adapter/flag once host-service is the only target. +- [ ] Revisit `/api/chat/[sessionId]` and durable-stream routes separately. Delete them only after confirming no remaining durable-stream consumers. +- [ ] Update docs to point to host-service chat as the runtime owner. + +**Acceptance:** runtime chat traffic goes only through host-service; cloud still owns session metadata unless a separate migration changes that. + +## Lower-Risk Notes + +Items that aren't outright bugs but should be verified during the migration. The Verified Bugs in §Implementation Audit are the load-bearing ones; these are the next tier. + +1. **Provider credentials parity.** `LocalModelProvider` reads keychain + mastracode auth storage + `~/.mastracode` config. Verify it covers every credential source the legacy desktop chat service supports (managed env config, backup slots, OAuth refresh) before flipping the flag for users with non-standard auth setups. +2. **Provider env mutation.** `applyRuntimeEnv()` mutates `process.env` globally. Concurrent runtimes for different model providers could in theory race on env keys. One provider per host-service install today, so probably fine in practice — but worth a comment in the code so a future contributor doesn't re-trip on it. +3. **Mastra memory-store assumption** in `restartFromMessage` (lines 230-245). Throws cryptically if storage isn't configured. Add a guard with a clearer error. +4. **`protectedProcedure` end-to-end check.** Confirm the chat router's `protectedProcedure` actually validates auth and that `ctx.organizationId` is populated where expected. The audit didn't trace this fully. +5. **Snapshot semantics communication.** `getSnapshot` is a single server-side observation, not an event-log atomic snapshot. Good enough for this migration; document that explicitly so anyone reading later doesn't oversell it as final consistency. + +## Relationship To Greenfield Plan + +This migration stabilizes the host-service runtime boundary that `v2-chat-greenfield-architecture.md` wants to build on. After this lands: + +- the event-log work can attach to host-service `ChatRuntimeManager`; +- `getSnapshot` becomes a temporary bridge until subscriptions/event-log reads replace polling; +- legacy Electron IPC runtime ownership can be deleted without redoing the host-service migration. + +## Summary + +Keep the existing host-service chat runtime and router. Add snapshot reads, a v1 compatibility adapter, cloud-owned session bootstrap cleanup, parity work, and a flag-based rollout. The old plan was written as if host-service chat still needed to be created; this version treats it as already present and worth preserving. diff --git a/plans/v2-chat-greenfield-architecture.md b/plans/v2-chat-greenfield-architecture.md new file mode 100644 index 00000000000..24d4c1dfa72 --- /dev/null +++ b/plans/v2-chat-greenfield-architecture.md @@ -0,0 +1,712 @@ +# V2 Chat — Greenfield Architecture Proposal + +Proposed transport + state architecture for v2 chat. Builds on `host-service-chat-architecture.md` and `chat-mastra-rebuild-execplan.md`, and takes specific patterns from `t3code-chat-architecture-reference.md`, `opencode-electron-chat-architecture-reference.md`, and `background-agents-chat-architecture-reference.md`. Starting point is the current v2 chat in `packages/chat` + `packages/host-service`, which polls `getDisplayState()` and `listMessages()` at 4 fps from two independent harness sources — that's the thing this proposal replaces. + +## Goals + +1. **Kill the polling race.** Single server-side source of truth per session; client reducer applies events in order. +2. **Keep the wire protocol stable across runtime locations.** Same `ChatEvent` shape whether the runtime is the user's laptop host-service or a cloud worker spun up for handoff. +3. **Support multi-device + multi-client on the same session.** A session opened on desktop, web, and mobile simultaneously must converge to identical state. +4. **Enable device handoff at turn boundaries.** Close laptop → continue on phone → return to laptop, with a cloud worker picking up in between. Mid-turn handoff is explicitly out of scope (see §P7). +5. **Host-service keeps owning the agent runtime and filesystem.** No change to the `host-service-chat-architecture.md` direction of travel — this is the *transport + state* layer that sits above it. (In the P5b DO path, host-service's authority scopes *down* slightly, see §Cloud-backed EventLog.) +6. **Reuse what we have.** tRPC everywhere, `@hono/node-ws` already in host-service, Zustand already in the dep tree. + +## Non-goals + +- Not migrating off tRPC. t3code's Effect-RPC is nice but the wire shape is what matters; we can get 90% of the value with tRPC subscriptions. +- Not replacing Mastracode. The harness stays — we wrap its event subscription, we don't rewrite the agent loop. +- Not event-sourcing the whole database. The *transport* is event-driven; persistence strategy is a separate decision (see §Persistence). +- Not inventing a new message shape. We already use Vercel AI SDK v6's `UIMessage` and its part types (`TextUIPart`, `ReasoningUIPart`, `ToolUIPart`, `FileUIPart`) throughout the chat UI — `ai-elements` renders them directly. We keep that as the canonical message shape. +- Not adopting `useChat` from `@ai-sdk/react`. It's built for client-initiated single-subscriber request/response. Our model is multi-subscriber, event-driven, with replay and approvals — outside its vocabulary. + +## Recommended architecture + +```mermaid +flowchart LR + subgraph Client["Client (desktop renderer · web · mobile)"] + direction TB + UI["React components"] + Store["Zustand session store
Record<id, UIMessage> · currentTurn ·
pendingApprovals · status"] + Reducer["applyEvent(state, event)"] + Recovery["gap detector
latestSeq · highestSeen"] + UI --> Store + Reducer --> Store + Recovery -.-> Store + end + + subgraph Transport["tRPC over WebSocket (@hono/node-ws)"] + direction TB + Cmd["mutations:
sendMessage · answerApproval ·
answerQuestion · interrupt"] + Replay["query: replayEvents(sessionId, fromSeq, toSeq)"] + Shell["subscription: workspace.watch"] + Detail["subscription: session.watch(sessionId)"] + end + + subgraph HostService["Host service (local) OR Cloud worker"] + direction TB + Router["chatRouter"] + Bridge["EventBridge
sequences harness events +
user commands, synthesizes
user_message_submitted"] + Dedup["CommandReceipts
(idempotent by commandId)"] + Log[("EventLog abstraction
append · readFrom · subscribe
seq-numbered · per-session")] + Shim["LocalEventLog (SQLite) today
— OR —
PostgresEventLog (P5a)
OR DO-native (P5b)"] + Harness["Mastracode harness
(unchanged)"] + Router --> Dedup + Router --> Bridge + Bridge --> Log + Log --> Shim + Bridge --> Harness + Harness --> Bridge + end + + Store -->|mutations| Cmd + Cmd --> Router + Recovery -->|on gap| Replay + Replay --> Log + Shell --> Store + Detail --> Store + Log -.->|fan-out| Shell + Log -.->|fan-out| Detail +``` + +The five load-bearing ideas, each earned from the reference docs: + +1. **Event log as the single source of truth.** Per-session, append-only, monotonically numbered. Both `getDisplayState()` and `listMessages()` become *projections of the log*, not independent queries. — from t3code. +2. **Append-style streaming deltas.** `{ messageId, partIndex, field, delta }` → client applies `messages[id].parts[i][field] += delta`. No unified diffs, no token objects. — from opencode. +3. **Dual subscription scope.** One workspace-wide stream for session summaries (sidebar), one per-session stream for message content. Each region of client state is written by exactly one stream. — from t3code. +4. **Command IDs + server dedup.** Mutations are idempotent by `commandId`; retries on reconnect don't duplicate effects. — from t3code. +5. **Gap detection + `replayEvents` RPC.** Client tracks `latestSeq` / `highestSeen`; on gap, fetch the slice. Non-negotiable for multi-device. — from t3code. + +And the four load-bearing things *we* add: + +6. **`EventLog` as an abstract interface** — swappable backend: local ring buffer + SQLite snapshot today, `s2.dev` durable stream tomorrow, without changing a line of client or router code. +7. **Writes stay as tRPC mutations.** No subscription-based commands. Everything the user does is a regular typed mutation that returns fast; its effect shows up as events on the subscription. +8. **Single session reducer per open session** (not a monolithic global store). Multiple sessions = multiple stores — keeps memory bounded and reducers small. +9. **`UIMessage` on the wire, our reducer on top.** Event payloads carry `UIMessage` and AI SDK part types verbatim. The reducer is ~150 lines of Zustand over a `Record`. `ai-elements` renders the result unchanged. No translation layer between wire and render. + +## Wire protocol + +### Events (server → client) + +Every event conforms to a base envelope, then a discriminated payload. Message and part shapes are AI SDK v6's `UIMessage` and its part union (`TextUIPart | ReasoningUIPart | ToolUIPart | FileUIPart | ...`) — not a custom type. + +```ts +import type { UIMessage } from "ai" +// UIMessage["parts"][number] is the canonical part union from AI SDK v6. +type UIPart = UIMessage["parts"][number] + +type ChatEvent = { + seq: number // monotonic per sessionId, gaps possible after replay + eventId: string + sessionId: SessionId + workspaceId: WorkspaceId + occurredAt: string // ISO + commandId: CommandId | null // null for harness-internal events + causationId: string | null // event that caused this, for tracing +} & EventPayload + +type EventPayload = + // Message lifecycle — uses UIMessage verbatim + | { type: "message.appended"; message: UIMessage } // full message added (user msg, assistant msg shell) + | { type: "message.part.appended"; messageId: UIMessage["id"]; partIndex: number; part: UIPart } // new part on an existing message + | { type: "message.part.delta"; messageId: UIMessage["id"]; partIndex: number; field: "text" | "reasoning"; delta: string } // append into part[field] + | { type: "message.part.updated"; messageId: UIMessage["id"]; partIndex: number; part: UIPart } // replace a part wholesale (tool state transitions, final snapshots) + | { type: "message.completed"; messageId: UIMessage["id"] } + // Turn lifecycle + | { type: "turn.started"; turnId: TurnId } + | { type: "turn.completed"; turnId: TurnId; status: "ok" | "error" | "cancelled" } + // Approvals / questions (out-of-band requests from the agent) + | { type: "approval.requested"; requestId: ApprovalId; tool: string; args: unknown } + | { type: "question.requested"; requestId: QuestionId; prompt: string } + // Session status — projection of turn/approval state, exposed for convenience + | { type: "status.changed"; status: "idle" | "running" | "waiting" | "error" } + | { type: "error"; error: ChatError } +``` + +Notes on the AI-SDK-aligned choices: + +- **`messageId` + `partIndex`** — `UIMessage.parts` is an ordered array, so parts are identified by position within a message, matching how `ai-elements` renders them. If we later need a stable `PartId` we can add it in `UIPart.metadata`, but it's not needed for the reducer to work. +- **`message.part.delta` uses `field: "text" | "reasoning"`** — both `TextUIPart` and `ReasoningUIPart` expose a string body that deltas append to. Tool parts (`ToolUIPart`) don't stream via delta — they emit `message.part.updated` events as their `state` transitions (`input-streaming → input-available → output-available | output-error`), which matches AI SDK v6's own tool state machine. +- **`message.appended` carries the full `UIMessage`** — cheap and unambiguous for initial user message insertion or for snapshot replay. Subsequent streaming uses `part.appended` / `part.delta` / `part.updated` to avoid re-sending the whole message. + +`seq` is per-session. Using per-session (not global) because: +- The harness emits per-session already; adding a global counter introduces a cross-session lock. +- Multi-device replay is always scoped to one session anyway. +- Matches s2.dev's per-stream sequence model cleanly. + +Workspace-level events (`session.created`, `session.metadata.changed`, `session.deleted`) ride a *separate* per-workspace log with its own `seq`. Analogous to t3code's shell vs detail split. + +### Commands (client → server, tRPC mutations) + +```ts +chat.session.sendMessage({ commandId, sessionId, text, attachments? }) +chat.session.answerApproval({ commandId, sessionId, requestId, reply: "accept" | "acceptForSession" | "decline" | "cancel", message? }) +chat.session.answerQuestion({ commandId, sessionId, requestId, answer: string }) +chat.session.interrupt({ commandId, sessionId }) +``` + +Every command carries a client-generated `commandId` (ULID). Server checks `CommandReceipts` before acting — retries after reconnect are free. The returned value is trivial (`{ ok: true, seqAfter }`) so the UI doesn't depend on it; all real state arrives on the subscription. + +### Subscriptions + +```ts +chat.workspace.watch(workspaceId) -> stream of WorkspaceEvent // sidebar +chat.session.watch({ sessionId, sinceSeq? }) -> stream of ChatEvent // open chat +``` + +`sinceSeq` is optional. If omitted, server sends a `snapshot` envelope first (`{ snapshot: ProjectedState, seqAfter: number }`), then live events. If `sinceSeq` is passed and is still in the server's replay window, server streams from there. If it's older than the window, server falls back to snapshot. + +### Replay + +```ts +chat.session.replayEvents({ sessionId, fromSeq, toSeq? }) -> ChatEvent[] +``` + +Called only when the client detects a gap (received `seq = N+2` while holding `latestSeq = N`). Subscription stream carries recent events; this query fills holes. Server implementation just reads the log. + +## Client-side design + +One Zustand store per currently-open session, plus one shared workspace store. Zustand is already the house pattern (direct dep in `packages/panes` and `apps/desktop`, ~20+ existing stores in `apps/desktop/src/renderer/stores/`), so no new primitives. + +Store shape uses `UIMessage` directly so `ai-elements` can render it without translation: + +```ts +import type { UIMessage } from "ai" + +interface SessionState { + status: "connecting" | "idle" | "running" | "waiting" | "error" + messages: Record // authoritative per-message state + messageOrder: UIMessage["id"][] // insertion order for rendering + pendingApprovals: Record + pendingQuestions: Record + currentTurn: { turnId: TurnId; messageId: UIMessage["id"] } | null + latestSeq: number + highestSeen: number + pendingBuffer: ChatEvent[] // events received out of order +} + +const useSessionStore = (sessionId: SessionId) => create((set, get) => ({ + // ... initial state ... + + applyEvent(event: ChatEvent) { + // Pure reducer. Switch on event.type: + // message.appended → messages[id] = event.message, messageOrder.push(id) + // message.part.appended→ messages[id].parts[partIndex] = event.part + // message.part.delta → messages[id].parts[partIndex][field] += event.delta + // message.part.updated → messages[id].parts[partIndex] = event.part (tool state transitions) + // message.completed → (nothing — message already reflects terminal state) + // turn.started/.completed → currentTurn = … / status = … + // approval.requested / question.requested → add to pendingApprovals / pendingQuestions + // status.changed → status = event.status + }, + + onReceive(event: ChatEvent) { + // 1. update highestSeen + // 2. if seq == latestSeq + 1: apply, drain pendingBuffer + // 3. if seq > latestSeq + 1: push to pendingBuffer, fire chat.session.replayEvents + // 4. if seq <= latestSeq: ignore (duplicate / reconnect overlap) + }, +})) +``` + +Selectors replace `useChatDisplay`: + +```ts +const messages = useSessionStore(sessionId, s => s.messageOrder.map(id => s.messages[id])) // UIMessage[] +const isRunning = useSessionStore(sessionId, s => s.status === "running") +const pendingApproval = useSessionStore(sessionId, s => firstOf(s.pendingApprovals)) +``` + +Because `messages` is already `UIMessage[]`, it flows straight into existing `ai-elements` components (``, ``, etc.). No adapter layer. + +This kills `withoutActiveTurnAssistantHistory` — the active assistant message is just the most-recent entry in `messages` whose parts are still mutating. There's no duplication between `currentMessage` and history to reconcile. + +**Per-frame coalescing.** Borrow from opencode: if `message.part.delta` events arrive faster than the browser can render, batch them and flush once per `requestAnimationFrame`. For non-delta events, apply immediately. Straight in the `onReceive` path, not in the reducer. + +**Why not `useChat` from `@ai-sdk/react`?** `useChat` holds `UIMessage[]` state and knows how to apply text deltas, but its mental model is "this client initiated this turn, this client owns the stream." Turns in our system can originate from another device; events arrive for approvals/questions/interrupts that `useChat` has no concept of; we need `sinceSeq` replay on reconnect; multiple open tabs share one session. Bending `useChat` to that model is strictly more work than owning the reducer — and the reducer over `UIMessage` is ~150 lines. + +## Host-service side + +`packages/host-service/src/runtime/chat/` grows an `EventBridge` alongside the existing `ChatRuntimeManager`: + +```ts +interface EventLog { + append(streamId: string, event: TEvent): Promise<{ seq: number }> + readFrom(streamId: string, fromSeq: number, toSeq?: number): Promise + subscribe(streamId: string, fromSeq?: number): AsyncIterable +} +``` + +Two implementations from day 1: +- `LocalEventLog` — in-memory ring buffer (N events or M minutes) with a SQLite durable backing store. Default. +- `S2EventLog` — `s2.dev` client (or whichever durable stream provider we pick). Swapped in via config when running against cloud. + +The `EventBridge`: + +1. Subscribes to `harness.subscribe()` for the session. +2. Translates raw harness events into typed `ChatEvent`s, adding `seq` (from the log) and `causationId`. +3. Appends to the `EventLog`. +4. Synthesizes `user_message_submitted` **before** calling the harness (fixes the gap called out in `chat-mastra-rebuild-execplan.md`), so user messages and assistant responses share the same ordering guarantee. +5. Serializes appends per session with an async queue — the bug call-out in the rebuild plan. + +tRPC router changes: + +- `chat.session.watch` becomes a `.subscription()` over WebSocket (host-service already has `@hono/node-ws`). +- `chat.session.replayEvents` is a regular query. +- Existing `chat.session.sendMessage` mutation wraps the `EventBridge` submission path with `CommandReceipts` dedup. +- `getDisplayState` and `listMessages` are **deleted**. If any internal code still needs a point-in-time snapshot, it reads from the projection (see §Persistence). + +## Message send flow, end-to-end + +```mermaid +sequenceDiagram + autonumber + participant User + participant Store as Client store (Zustand) + participant WS as tRPC over WS + participant Router as chatRouter (host) + participant Receipts as CommandReceipts + participant Bridge as EventBridge + participant Log as EventLog + participant Harness as Mastracode harness + + Note over Store,Log: session.watch subscription already open
client latestSeq = N + + User->>Store: submit "do X" + Store->>Store: optimistic message.appended
(local-only, pending confirm) + Store->>WS: mutation sendMessage
{ commandId, sessionId, text } + WS->>Router: mutation + + Router->>Receipts: lookup(commandId) + alt commandId already seen (retry) + Receipts-->>Router: prior result + Router-->>WS: { ok: true, seqAfter } + WS-->>Store: resolve (no-op, event already applied) + else new command + Router->>Bridge: submit(user message) + Bridge->>Log: append(message.appended{user}) + Log-->>Bridge: seq = N+1 + Bridge->>Log: append(turn.started) + Log-->>Bridge: seq = N+2 + Log-->>WS: fan-out events N+1, N+2 + WS-->>Store: onReceive(N+1) → reconcile optimistic
onReceive(N+2) → status = "running" + Bridge->>Harness: sendTurn() + Router->>Receipts: store(commandId → N+2) + Router-->>WS: { ok: true, seqAfter: N+2 } + WS-->>Store: mutation resolved + end + + loop while turn is running + Harness->>Bridge: runtime event
(token delta · tool call · approval · …) + Bridge->>Bridge: translate → ChatEvent
(stamp causationId) + Bridge->>Log: append(event) + Log-->>Bridge: seq = N+k + Log-->>WS: fan-out + WS-->>Store: onReceive(event) + + alt seq == latestSeq + 1 + Store->>Store: applyEvent
(deltas coalesce to rAF flush) + else seq > latestSeq + 1 (gap) + Store->>Store: push to pendingBuffer + Store->>WS: query replayEvents(fromSeq, toSeq) + WS->>Log: readFrom(fromSeq, toSeq) + Log-->>WS: missing events + WS-->>Store: events + Store->>Store: apply each, drain buffer + else seq <= latestSeq (duplicate / overlap) + Store->>Store: ignore + end + end + + Harness->>Bridge: turn complete + Bridge->>Log: append(message.completed, turn.completed) + Log-->>WS: fan-out + WS-->>Store: apply → status = "idle" + Store-->>User: render final assistant message +``` + +Reading notes: + +- **Steps 1-3.** Optimistic user message lands in the store immediately — no round-trip wait for the first pixel. It gets reconciled (not replaced) when the server-authored `message.appended` arrives at step 12 with the real `seq` and server-authored `messageId`. +- **Steps 4-7.** `commandId` dedup makes the whole mutation idempotent. A flaky network can retry `sendMessage` all day; the server runs the turn exactly once. +- **Steps 13 onwards.** The subscription is the hot path for everything that happens after submission. The mutation's `{ ok: true, seqAfter }` return is a hint, not the data; the UI never blocks on it. +- **Gap branch (step 22-27).** This is the reconnect / out-of-order-delivery path. Client detects `seq > latestSeq + 1`, buffers the new event, fetches the missing slice, applies in order. Same mechanism handles "backgrounded for 10 minutes, WS died" as "one dropped packet." +- **Tool approvals / user questions** follow the same loop — `approval.requested` is just another event on the stream. The user's reply is a separate `answerApproval` mutation (not shown), which produces an `approval.responded` event that unblocks the harness. + +Every box outside of `Harness` is code we write. `Harness` is unchanged Mastracode. + +## Persistence + +The event log is the wire protocol. Storage is separate. + +Short-term: SQLite table `chat_events (stream_id, seq, event_json, occurred_at)` with index on `(stream_id, seq)`. Projection tables (messages, sessions) rebuilt on startup by replaying the log, cached in memory for reads. + +This mirrors t3code but lighter — we only need projections where we need fast server-side reads, not for every aggregate. The `messages` projection probably matters; `pendingApprovals` doesn't (keep it in memory). + +Long-term (cloud): the `EventLog` interface gets a second implementation backed by a cloud-shared durable store (Postgres on Neon, by default), or alternatively the whole control plane moves to Cloudflare Durable Objects. Both are covered in detail below. + +The *client* never sees persistence directly — it always talks to `EventLog` through the tRPC/WS surface. + +## Cloud-backed `EventLog`: two paths + +Once we want cross-device visibility (same session on laptop + phone simultaneously) or a cloud-hosted agent runtime, `LocalEventLog` on host-service's SQLite is no longer enough — the log has to be reachable from any process that might own a runtime or serve a subscription. We have two credible paths; they are genuinely different and worth choosing between with eyes open. + +### Path A — Postgres-backed EventLog (`PostgresEventLog`) + +Stay on the existing stack (Neon). Add one table and adapt the `EventLog` implementation: + +```sql +CREATE TABLE chat_events ( + stream_id TEXT NOT NULL, + seq BIGINT NOT NULL, + event_json JSONB NOT NULL, + occurred_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (stream_id, seq) +); +CREATE INDEX chat_events_stream_time_idx ON chat_events (stream_id, occurred_at); +``` + +- `append()` → `INSERT` + `pg_notify('chat_session_', ...)` for live fan-out. +- `readFrom()` → `SELECT WHERE stream_id = $1 AND seq BETWEEN $2 AND $3`. +- `subscribe()` → open a `LISTEN`, stream rows as they arrive. If many subscribers per host is painful, bounce through an in-process broadcaster per session. +- Per-session write ordering → an in-process async queue in whichever host-service or cloud worker currently owns the session's writes, plus a lease row in Postgres (`chat_session_ownership`) to prevent two processes racing on the same session. +- Command dedup → same `CommandReceipts` table we already have. + +Who owns the `EventLog`: + +- Host-service continues to own its sessions locally and writes events to both `LocalEventLog` (SQLite) and `PostgresEventLog` (for cross-device visibility). +- When host-service isn't reachable, a cloud worker can claim the lease and take over writes. + +**What this buys:** cross-device read, cross-device replay, cloud-runtime handoff, and no new vendor. Everything builds on Neon, which is already in the stack. + +**What you still build:** +- Lease-based session ownership across processes. +- Per-session write serialisation inside whichever process holds the lease. +- A subscription-fan-out tier (probably host-service's tRPC WS server, or a parallel cloud Node service). +- Idle-cost story: any Node process holding N WebSockets for idle sessions is paying to be idle. + +### Path B — Durable Objects as the whole control plane + +Adopt Cloudflare Workers + Durable Objects. The `EventLog` *and* the subscription transport *and* the session-ownership story all collapse into a single primitive: one DO per session. + +``` +Clients (phone / web / desktop renderer) + │ WebSocket direct to SessionDO + ▼ +┌─────────────────────── Cloudflare ──────────────────────────┐ +│ │ +│ Stateless Worker │ +│ auth · ws-token mint · routing │ +│ │ +│ SessionDO (one per sessionId) │ +│ SQLite storage (events · messages · command_receipts) │ +│ WebSocket hub (browsers + agent runtime) │ +│ Single-threaded — ordering free, no locks │ +│ Hibernates when idle — near-zero cost per idle session │ +│ │ +│ WorkspaceDO (one per workspaceId) │ +│ sidebar index + session-list events │ +│ │ +│ D1 (global) │ +│ users · workspaces · session directory · encrypted creds │ +└─────────────────────────────────────────────────────────────┘ + +Laptop host-service (unchanged filesystem + Mastracode ownership) + ▲ + │ WebSocket connects to SessionDO as a "runtime participant" + │ subscribes for user messages, runs turns, streams events back + +Cloud runtime (Modal/Daytona/Fly container, spun up on handoff) + ▲ + │ Same "runtime participant" role; same protocol +``` + +Shifts vs. Path A: + +- Event log = the DO's per-session SQLite. No Postgres table for events. +- Transport = browsers connect WebSocket directly to the DO via a CF Worker. No host-service-hosted tRPC subscription server for chat. +- Session ownership = platform-guaranteed. Every request for `session/abc-123` goes to the same DO instance. No lease table. +- Per-session write serialisation = free. DOs are single-threaded. +- Fan-out = free. The DO owns the WebSockets and broadcasts natively. +- Idle cost = near-zero. DO hibernation holds WebSockets open while the compute sleeps. +- Host-service role = **downgrades from "source of truth + runtime" to "runtime participant only."** It connects to the SessionDO like any other client, listens for user messages, runs turns, streams events back. It no longer owns the chat event log; SessionDO does. +- Relay role = not used for chat (browsers hit DOs directly). Still used for filesystem tools and terminal. + +**What this buys on top of Path A:** +- Multi-device fan-out as a platform feature (no custom broker). +- Ownership coordination as a platform feature (no lease protocol). +- Hibernation as a platform feature (thousands of idle sessions effectively free). +- Handoff between devices / runtimes is essentially free — both laptop and cloud worker just subscribe to the same SessionDO. +- Multiplayer (multiple humans in one session) is essentially free if ever wanted. + +**What it costs:** +- **New cloud vendor.** Cloudflare enters the stack with its own deployment tooling (Wrangler), observability story, secrets model, and debugging surface. +- **Lock-in.** DOs are Cloudflare-specific. Portable to no other cloud without a rewrite. +- **Chat becomes cloud-dependent.** Host-service's local SQLite is no longer authoritative. Laptop-offline-but-chatting stops working unless we build a local-cache shadow layer. Most teams going DO-native accept this; it's a real regression worth being explicit about. +- **Agent still runs elsewhere.** DO CPU limits prevent Mastracode turns from running inside the DO. Runtime is still host-service (or cloud worker on handoff). + +### Decision framing + +The `EventLog` interface (defined in P0) fits both paths without modification, so nothing in P0-P4 is gated on which cloud path we pick. When we do pick: + +- Path A (Postgres) is the default if we want to stay on our current vendors, are willing to write the ownership + fan-out + idle-cost code ourselves, and don't need the platform-level multi-device ergonomics DOs provide. +- Path B (DOs) is the default if we're open to Cloudflare in the stack and want to shave several piles of custom infrastructure in exchange for that lock-in. + +Both are real choices. See §Phased migration — P5a and P5b — for what shipping each one actually involves. + +## Phased migration + +Phases are sequential. P0-P1 can interleave a little; P2 depends on P1; P3 depends on P2; P4 is pure cleanup; P5 is independent from P4 (can ship before or after). + +### Blockers to resolve before P0 + +These are the three questions that will bite us if we skip them: + +- [ ] **Decide `seq` ownership: in-memory counter vs. log-assigned.** Recommendation: log-assigned, returned from `append()`. Keeps `EventBridge` single-source and avoids a second counter to keep in sync. +- [ ] **Resolve provider credential scoping** for renderer ↔ host-service direct WS (the open question from `host-service-chat-architecture.md`). If we don't, P1 stalls. +- [ ] **Pick the event-type ownership location.** Recommendation: a new `packages/chat-protocol` (schema + TypeScript types only, no runtime) importable by host-service, renderer, mobile, and future cloud worker. Current `packages/trpc` is router-level, not protocol-level. + +### P0 — Protocol & EventLog interface (server-side only, no wire changes) + +**Goal:** define the contract and the local implementation, verified by a parity test against the current polling output. + +- [ ] Create `packages/chat-protocol` with: + - [ ] `ChatEvent` envelope type (seq, eventId, sessionId, workspaceId, occurredAt, commandId, causationId). + - [ ] `EventPayload` union (using AI SDK `UIMessage` and its part types — no invented message types). + - [ ] `WorkspaceEvent` union for the sidebar stream. + - [ ] Command input schemas (`sendMessage`, `answerApproval`, `answerQuestion`, `interrupt`) with `commandId: string` required. + - [ ] Zod schemas alongside TypeScript types (so tRPC inputs validate). +- [ ] Define `EventLog` interface in `packages/chat-protocol`: + - [ ] `append(streamId, event): Promise<{ seq }>` + - [ ] `readFrom(streamId, fromSeq, toSeq?): Promise` + - [ ] `subscribe(streamId, fromSeq?): AsyncIterable` + - [ ] `snapshot(streamId): Promise<{ state, seq }>` — serves snapshot-on-subscribe. +- [ ] Implement `LocalEventLog` in `packages/host-service/src/runtime/chat/event-log/`: + - [ ] In-memory ring buffer (default: last 500 events + 15 min, configurable). + - [ ] SQLite durable backing table `chat_events(stream_id, seq, event_json, occurred_at)` via existing Drizzle setup. + - [ ] `subscribe()` implemented as a hot async iterable with pull-based backpressure. +- [ ] Implement `EventBridge` in `packages/host-service/src/runtime/chat/`: + - [ ] Wrap each `RuntimeSession`'s `harness.subscribe()` in a translator that produces typed `ChatEvent`s. + - [ ] Per-session serialized async queue (fixes the ordering bug flagged in `chat-mastra-rebuild-execplan.md`). + - [ ] Synthesize `user_message_submitted` / `message.appended` events *before* calling into the harness so the log ordering is deterministic. + - [ ] Stamp `causationId` where a new event chains from an earlier one. +- [ ] Implement `CommandReceipts` in host-service: + - [ ] SQLite table `command_receipts(command_id PK, session_id, result_seq, created_at)`. + - [ ] Dedup middleware on mutations: if `commandId` exists, return the stored `{ ok: true, seqAfter }` without re-executing. + - [ ] TTL sweep (e.g. 24h). +- [ ] Parity harness: + - [ ] Test that runs a recorded session (user message → tool calls → assistant response) through `EventBridge`, then projects the resulting log through a reducer. + - [ ] Asserts the projected state equals the current `getDisplayState()` + `listMessages()` output for the same inputs. + +**Acceptance:** projection-from-log matches polling-output byte-for-byte on a corpus of ≥5 recorded sessions, including an approval flow and an interrupt. + +### P1 — tRPC surface on host-service (transport, still no client changes) + +**Goal:** expose the event log as typed tRPC subscriptions and replay query. Old polling procedures still work. + +- [ ] Add tRPC subscription procedures in `packages/host-service/src/trpc/router/chat/`: + - [ ] `chat.session.watch({ sessionId, sinceSeq? })` → `Observable`. + - [ ] `chat.workspace.watch({ workspaceId, sinceSeq? })` → `Observable`. +- [ ] Add the replay query: + - [ ] `chat.session.replayEvents({ sessionId, fromSeq, toSeq? })` → `ChatEvent[]`. +- [ ] Wire subscriptions onto `@hono/node-ws` in host-service's app.ts (the terminal route already shows the pattern). +- [ ] Add typed mutations with `commandId` on every input: + - [ ] `chat.session.sendMessage` + - [ ] `chat.session.answerApproval` + - [ ] `chat.session.answerQuestion` + - [ ] `chat.session.interrupt` + - Each wraps the equivalent existing mutation, adds `CommandReceipts` dedup, and returns `{ ok: true, seqAfter }`. +- [ ] Leave `getDisplayState` / `listMessages` intact for now. +- [ ] Write a Node test client (checked in under `packages/host-service/test/`): + - [ ] Connects via WS, subscribes, sends `sendMessage`, asserts expected event sequence arrives. + - [ ] Drops the WS mid-stream, reconnects with `sinceSeq`, asserts no duplicates and no gaps. + - [ ] Simulates a gap (forces server to skip), asserts client-side replay call fills it. + - [ ] Double-submits the same `commandId`, asserts only one effect. + +**Acceptance:** test client green across all four scenarios. No existing chat code has changed. + +### P2 — Client store, reducer, and gap detector (client-side, no UI swap yet) + +**Goal:** everything a UI component would need to render chat off the event stream, shipped as a drop-in hook. + +- [ ] Create `packages/chat/src/client/session-store/` (or similar; can also live in `apps/desktop/src/renderer/stores/chat-session/` if it stays desktop-only — recommend the package to keep web/mobile aligned): + - [ ] Zustand store factory `createSessionStore(sessionId)` with the shape defined in §Client-side design. + - [ ] Pure reducer `applyEvent(state, event)` covering every `EventPayload` variant. Use Immer or structural-clone helpers; keep it obviously pure. + - [ ] `onReceive(event)` with gap detection (`seq vs latestSeq`), pendingBuffer, dedup on `seq <= latestSeq`. + - [ ] Replay trigger: on gap, fires `chat.session.replayEvents`, applies result, drains buffer. +- [ ] Per-frame coalescer: + - [ ] Batch `message.part.delta` events by `(messageId, partIndex, field)` and flush on `requestAnimationFrame`. + - [ ] Non-delta events apply synchronously. +- [ ] Subscription hook `useChatSessionSubscription(sessionId)`: + - [ ] Opens the tRPC subscription, wires events into `onReceive`. + - [ ] Handles WS disconnect → reopen with `sinceSeq = latestSeq + 1`. + - [ ] Surfaces `status: "connecting" | "live" | "replaying" | "error"` for UI affordances. +- [ ] Workspace store (analogous, smaller) for the sidebar. +- [ ] Compatibility shim `useChatDisplay_v2(sessionId)` that exposes the same selector keys today's `useChatDisplay` returns (`messages`, `isRunning`, `currentMessage`, etc.) — this makes P3 a flag flip rather than a rewrite. +- [ ] Unit tests: + - [ ] Apply a recorded event stream to the reducer, snapshot the resulting state. + - [ ] Fuzz: randomized ordering with one missing event, assert store converges to canonical state after replay. + - [ ] Coalescer: 1000 deltas, assert at most 60 flushes per second. + +**Acceptance:** reducer tests green; compatibility shim renders an identical `ChatPane` against a scripted event stream in a Storybook story. + +### P3 — Swap UI consumers + +**Goal:** chat in the app is driven by the event stream; old polling is gated off. + +- [ ] Add a feature flag `chat.useEventStream` (off by default). +- [ ] Swap ChatPane in v2-workspace to use `useChatDisplay_v2` under the flag. +- [ ] Swap any other `useChatDisplay` consumers (grep: `apps/desktop/src/renderer/**/useChatDisplay`). +- [ ] Dogfood on the v2-chat-architecture branch for ≥1 week across desktop. +- [ ] QA matrix: + - [ ] Golden path: send message, get response, render tokens. + - [ ] Tool approval flow. + - [ ] Mid-turn interrupt. + - [ ] Reconnect during active turn. + - [ ] Two windows open on the same session (should converge). + - [ ] Rapid consecutive messages (no lost events). +- [ ] Flip the flag default to on once QA is green; keep the flag for two releases as an escape hatch. + +**Acceptance:** no regressions in the chat QA matrix vs. current main for two full releases. + +### P4 — Delete the old surface + +**Goal:** one code path for chat, not two. + +- [ ] Remove the `chat.useEventStream` flag (and any dead branches it gated). +- [ ] Delete `getDisplayState` and `listMessages` tRPC procedures. +- [ ] Delete `packages/chat/src/client/hooks/use-chat-display/`. +- [ ] Delete `withoutActiveTurnAssistantHistory` and related helpers. +- [ ] Delete the legacy surface in `packages/chat/src/server/trpc/service.ts` (the desktop-only tRPC service). Host-service is the only owner — this finishes Phase 3 of `host-service-chat-architecture.md`. +- [ ] Remove `@superset/chat/client/provider` re-exports that nothing else imports. +- [ ] Update `AGENTS.md` / relevant docs to point at the new surface. + +**Acceptance:** zero references to the deleted surface in `apps/` or `packages/` (excluding `temp/`). CI green. + +### P5 — Cloud-backed EventLog (pick A or B; independent of P4) + +P5 is where cross-device visibility and cloud-runtime handoff become possible. Two alternative paths — choose one; they're not additive. See §Cloud-backed `EventLog` above for the comparison. + +#### P5a — Postgres-backed EventLog (stay on current stack) + +**Goal:** ship `PostgresEventLog` behind the existing `EventLog` interface so host-service and cloud processes can read/write the same session log. + +- [ ] Add Drizzle migration for `chat_events(stream_id, seq, event_json, occurred_at)` with `PRIMARY KEY (stream_id, seq)` and a secondary index on `(stream_id, occurred_at)`. +- [ ] Add `chat_session_ownership(stream_id, owner_id, lease_expires_at)` for single-writer ownership. +- [ ] Implement `PostgresEventLog`: + - [ ] `append` → INSERT + `pg_notify('chat_stream_' || stream_id, seq)`. + - [ ] `readFrom` / `snapshot` → range SELECT. + - [ ] `subscribe` → `LISTEN` + in-process fan-out to multiple local subscribers on the same process. + - [ ] Ownership lease acquisition + heartbeat; lease-loss callback so the losing owner stops writing immediately. +- [ ] Dual-write for host-service: `LocalEventLog` stays the primary when host-service is reachable; `PostgresEventLog` mirrors events for cross-device visibility. (Alternative: make Postgres authoritative and drop `LocalEventLog` — simpler, but breaks laptop-offline chat.) +- [ ] Stand up a thin cloud "chat node" (small Fly / Cloudflare Worker service) that serves subscriptions to browsers when host-service is unreachable. Reads from `PostgresEventLog`; forwards writes to whichever process holds the lease. +- [ ] Per-environment config toggle for which `EventLog` implementation each process uses. +- [ ] Multi-device end-to-end test: desktop + web attached to the same session, events converge identically across both; simulated network partition on one device. +- [ ] Operational runbook: lease renewal, stuck-lease recovery, replay-window sizing, disaster recovery. +- [ ] Capacity / cost model: events per session per day × session count × retention window against Neon's pricing. + +**Acceptance:** two clients on the same session converge to identical state after a scripted sequence with a simulated network partition on one side. + +#### P5b — Durable-Objects-native control plane (Cloudflare) + +**Goal:** replace the cloud subscription tier entirely by making each session live in its own Cloudflare Durable Object. Host-service becomes a runtime participant connecting to the DO instead of a source of truth. + +- [ ] Stand up a new Cloudflare Workers deployment with Durable Objects bindings and a D1 database. +- [ ] Implement `SessionDO`: + - [ ] SQLite storage (events, messages queue, command_receipts, participants, ws_client_mapping). + - [ ] `fetch` handler for tRPC-style mutations (sendMessage, answerApproval, answerQuestion, interrupt). + - [ ] `webSocketMessage` handler for live client messages (subscribe, prompt, stop, typing, presence). + - [ ] Fan-out helper that iterates connected WSs on every event append. + - [ ] Hibernation enabled; `ws_client_mapping` persists `wsId → participantId` for rehydration. +- [ ] Implement `WorkspaceDO` for per-workspace sidebar events. +- [ ] Implement `D1` schema for users, workspaces, session directory, encrypted credentials. +- [ ] Stateless auth Worker: validate OAuth / JWT, mint WS tokens (short TTL), route to the correct DO. +- [ ] Adapt host-service to connect to `SessionDO` as a runtime participant: + - [ ] Open a long-lived WS with a runtime-auth-token (issued per-session). + - [ ] Subscribe to user-message events; feed them to Mastracode. + - [ ] Write harness events back to the DO via the same WS. + - [ ] Handle "another runtime claimed this session" eviction gracefully. +- [ ] Adapt clients (browser, mobile) to open WebSockets directly to `SessionDO` via the Worker surface, not through host-service / the relay. +- [ ] Decide local-offline story: accept that chat requires cloud connectivity, OR ship a `LocalCacheEventLog` shadow layer that mirrors the DO to host-service's SQLite and serves reads when offline. Recommend starting with "accept cloud dependency" and adding local cache only if users push back. +- [ ] Migrate the chat portion of the relay out — browser traffic for chat stops going through the relay. Filesystem / terminal traffic stays on the relay. +- [ ] Multi-device end-to-end test: desktop + web + phone on the same session, convergent state, simulated partition, simulated laptop-off with cloud worker handoff. +- [ ] Operational runbook: DO storage limits, wrangler deploy process, observability + logging, secrets rotation, Cloudflare-specific failure modes. +- [ ] Capacity / cost model: request count × DO-hours × storage × egress. + +**Acceptance:** same as P5a, plus one additional scenario: session active on laptop → laptop disconnects → phone continues seeing the event stream without any latency beyond DO wake-from-hibernation. + +**Note on going from P5a to P5b later:** the `EventLog` interface is implementation-agnostic, so moving from Postgres to DOs *is* possible later, but it's not a drop-in swap — P5b specifically changes who owns the transport (browser connects direct to DO, not through host-service), which is a bigger shift than just swapping the storage. If there's any chance we'll go DO-native, pick it the first time through P5. + +### P6 — Device handoff (turn-boundary + git courier) + +**Goal:** when the primary runtime (laptop host-service) becomes unreachable, subsequent turns can be served by a cloud worker without losing session continuity or uncommitted work. Not mid-turn — turn-boundary only. + +Depends on P5 (either P5a or P5b) — the event log has to be cloud-reachable. + +- [ ] Add event types to the protocol: + - [ ] `runtime_registered { runtimeId, kind: "host" | "cloud", capabilities }` — emitted when a runtime attaches to a session. + - [ ] `runtime_unregistered { runtimeId, reason }` — emitted on graceful disconnect or heartbeat timeout. + - [ ] `handoff_ready { branchName, commitSha, fromRuntimeId }` — emitted when a runtime stashes in-progress work for handoff. + - [ ] `turn_interrupted { turnId, reason }` — emitted when a turn is abandoned mid-flight due to runtime loss. +- [ ] Runtime ownership protocol: + - [ ] Only one `runtime_registered` is active per session at a time. + - [ ] Ownership renewal via heartbeat events every N seconds. + - [ ] New runtime can claim after heartbeat timeout + grace window. +- [ ] Laptop host-service handoff trigger: + - [ ] On graceful shutdown (lid close via macOS power notification, quit, explicit "handoff" command): stage + commit dirty workspace files to `superset/handoff/`, push, emit `handoff_ready`. + - [ ] On ungraceful loss (network drop + grace window expires): cloud coordinator declares the runtime dead, emits `turn_interrupted` if a turn was mid-flight. +- [ ] On-demand cloud runtime spawn: + - [ ] Coordinator service watches sessions with pending user messages and no active runtime. + - [ ] Spawns a Modal/Daytona/Fly-container sandbox, clones the repo, checks out the handoff branch (or `main` if no handoff branch), runs `.superset/setup.sh` if present. + - [ ] New cloud runtime registers with the session via `runtime_registered`, picks up the pending message, runs the turn, streams events back. +- [ ] Return-to-laptop flow: + - [ ] Host-service reconnects, sees later events in the log authored by a cloud runtime. + - [ ] Pulls any ephemeral branches the cloud runtime pushed (e.g. `superset/cloud//`). + - [ ] UI surface: "your session ran in the cloud while you were away; here's the diff — merge or discard?" +- [ ] Speculative runtime warming: when phone sends `typing` and no local runtime is reachable, emit a warming signal so the cloud coordinator starts spawning a sandbox speculatively. Hides cold-start latency. +- [ ] Honest limitations to document: + - [ ] Mid-turn handoff not supported — interrupted turns are lost, user re-prompts. + - [ ] Untracked / gitignored files are lost on handoff (only committed state travels). + - [ ] Long-running processes (dev server, watch modes) started inside a turn don't survive the handoff. +- [ ] Tests: laptop-closes-mid-session convergence; return-to-laptop merge UX; simultaneous runtime-claim race; cloud runtime timeout + re-spawn. + +**Acceptance:** user can close laptop mid-chat, continue from phone, see cloud-run turns land in real time, return to laptop and pull cloud-authored commits without conflict in the golden path. + +### P7 (speculative) — Event-sourced agent for mid-turn handoff + +Deliberately not in scope for the first pass. Covered in conversation because it keeps coming up. + +Seamless mid-turn handoff requires the agent's in-flight state (LLM stream position, partially-applied tool results) to be reconstructible from the event log, which in turn requires the agent to emit *intent* events before acting and to be designed around replay-safe tools. That's a ~quarter-scale of work on top of P6 and produces a quality regression on re-prompted LLM streams. Worth revisiting only if long autonomous runs + device handoff become first-class product requirements. + +### Summary timeline + +Rough sizing (for a single engineer, not including meetings / reviews): + +| Phase | Est. | Can parallelize? | Ships behind flag? | +|-------|------|------------------|--------------------| +| Blockers | 1-2 days | — | n/a | +| P0 | 1-2 weeks | No | n/a (no wire change) | +| P1 | 1 week | Partial with P0 end | n/a (additive) | +| P2 | 1-2 weeks | After P1 | n/a | +| P3 | 1 week + bake time | After P2 | Yes | +| P4 | 2-3 days | After P3 bake | n/a | +| P5a (Postgres) | 2-3 weeks | Any time after P2 | Yes (config) | +| P5b (Durable Objects) | 4-6 weeks | Any time after P2; mutually exclusive with P5a | Yes (config) | +| P6 (handoff) | 2-3 weeks | After P5 | Yes | +| P7 (event-sourced agent) | quarter-scale | Speculative / not on critical path | n/a | + +P0-P4 is the critical path for killing the race condition and unblocking same-machine multi-window. P5 is where cross-device + cloud-runtime becomes possible; P5a and P5b are alternatives, not additive. P6 delivers the close-laptop-continue-on-phone handoff UX on top of whichever P5 path was chosen. P7 is explicitly not on the roadmap — listed so we don't accidentally rediscover why it's hard. + +## Open questions + +- **P5 path: Postgres or Durable Objects?** The single biggest deferred decision. Postgres keeps us on our current vendors and requires more custom infra (ownership leases, fan-out tier, idle-cost story). DOs replace several layers with platform primitives but bring Cloudflare into the stack and make chat cloud-dependent. Worth resolving before P5 starts, but not before — P0-P4 is the same either way. +- **Local-offline chat in the DO path.** If we go P5b, do we accept that chat requires internet, or build a `LocalCacheEventLog` shadow layer that mirrors the DO? Recommend starting without it and adding only if users surface friction. +- **Ownership of `commandId` dedup window.** Keep `CommandReceipts` in our own durable store regardless of P5 path — cleaner semantics, survives backend swaps, less magic. +- **Subscription granularity for workspace stream.** One subscription per workspace or one per user-across-workspaces? Recommend per-workspace — matches the current sidebar scope and keeps events small. +- **Backpressure.** If a slow client can't keep up with token deltas, do we drop, coalesce, or disconnect? Recommend server-side (or DO-side) ring-per-subscriber with coalesce-latest-wins on delta events, hard-drop on sustained overflow with an `error` event sent down the wire. +- **Mobile background reconnect.** When iOS backgrounds the app for 20 minutes, the WS dies. On resume: reopen + `sinceSeq` + replay. Should be free with this design, but worth an explicit test plan. +- **Provider credential scoping for renderer-direct connection.** The open question from `host-service-chat-architecture.md` still applies — if the renderer talks to host-service WS directly (P5a) or a DO directly (P5b), provider creds need to be scoped cleanly. Resolve before P1. +- **Handoff workspace state.** P6 uses git as the filesystem courier. Fine for committed state; what's our story for untracked files the agent wrote but hadn't committed? Options: auto-stash to a handoff branch, snapshot workspace to object storage, or accept the loss. Resolve before P6. + +## Summary + +tRPC subscriptions over the WebSocket we already have, wrapping an `EventLog` abstraction. `LocalEventLog` on SQLite for the local case. Two cloud options: **`PostgresEventLog` on Neon (P5a)** to stay on current vendors, or **Durable Objects (P5b)** to fold event log + subscription transport + session ownership + hibernation into one Cloudflare primitive at the cost of vendor lock-in. Per-session event streams with monotonic `seq`, `commandId`-keyed idempotent mutations, `replayEvents` on gap, and a Zustand reducer over `UIMessage` that applies events in order. Two subscription scopes — workspace for the sidebar, session for the open chat — each writing to a disjoint region of client state. On top of P5, a **P6 handoff flow** (turn-boundary, git as filesystem courier, on-demand cloud runtime) covers close-laptop-continue-on-phone. Mid-turn handoff is explicitly out of scope. Everything above is justification.