diff --git a/.gitignore b/.gitignore index 9f925b63328..a53981f4ee8 100644 --- a/.gitignore +++ b/.gitignore @@ -100,3 +100,6 @@ test-conflict-repo/ # Claude Code session lock (runtime artifact) .claude/scheduled_tasks.lock temp/ + +# Local-only plans (not tracked) +plans/local/ diff --git a/apps/desktop/package.json b/apps/desktop/package.json index 4077a3f8e18..47874033952 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -2,7 +2,7 @@ "name": "@superset/desktop", "productName": "Superset", "description": "The last developer tool you'll ever need", - "version": "1.5.10", + "version": "1.6.2", "main": "./dist/main/index.js", "resources": "src/resources", "repository": { diff --git a/apps/desktop/src/renderer/components/Chat/ChatInterface/components/ChatInputFooter/ChatInputFooter.tsx b/apps/desktop/src/renderer/components/Chat/ChatInterface/components/ChatInputFooter/ChatInputFooter.tsx index 7e5a0eee5a0..f3637958d85 100644 --- a/apps/desktop/src/renderer/components/Chat/ChatInterface/components/ChatInputFooter/ChatInputFooter.tsx +++ b/apps/desktop/src/renderer/components/Chat/ChatInterface/components/ChatInputFooter/ChatInputFooter.tsx @@ -1,3 +1,4 @@ +import { chatServiceTrpc } from "@superset/chat/client"; import { PromptInput, PromptInputAttachment, @@ -115,6 +116,34 @@ export function ChatInputFooter({ setLinkedIssues((prev) => prev.filter((issue) => issue.slug !== slug)); }, []); + const trpcUtils = chatServiceTrpc.useUtils(); + const searchFiles = useCallback( + async (query: string) => { + const results = await trpcUtils.workspace.searchFiles.fetch({ + rootPath: cwd, + query, + includeHidden: false, + limit: 20, + }); + return results.map((r) => ({ + id: r.id, + name: r.name, + relativePath: r.relativePath, + })); + }, + [trpcUtils, cwd], + ); + const previewSlashCommand = useCallback( + async (text: string) => { + const result = await trpcUtils.workspace.previewSlashCommand.fetch({ + cwd, + text, + }); + return result ?? null; + }, + [trpcUtils, cwd], + ); + const handleSend = useCallback( (message: PromptInputMessage) => { if (linkedIssues.length === 0) return onSend(message); @@ -182,6 +211,8 @@ export function ChatInputFooter({ /> Promise; + interface SlashCommandPreviewPopoverProps { cwd: string; + previewSlashCommand: PreviewSlashCommandFn; slashCommands: Array<{ name: string; aliases: string[]; @@ -24,6 +33,7 @@ interface SlashCommandPreviewPopoverProps { export function SlashCommandPreviewPopover({ cwd, + previewSlashCommand, slashCommands, editor, isFocused, @@ -59,15 +69,21 @@ export function SlashCommandPreviewPopover({ const parsedInput = useMemo(() => parseSlashInput(inputValue), [inputValue]); const debouncedSlashPreviewInput = useDebouncedValue(slashPreviewInput, 120); - const { data: slashPreview } = - chatServiceTrpc.workspace.previewSlashCommand.useQuery( - { cwd, text: debouncedSlashPreviewInput }, - { - enabled: debouncedSlashPreviewInput.length > 1 && !!cwd, - staleTime: 250, - placeholderData: (previous) => previous, - }, - ); + const [slashPreview, setSlashPreview] = useState(null); + useEffect(() => { + if (debouncedSlashPreviewInput.length <= 1 || !cwd) return; + let cancelled = false; + previewSlashCommand(debouncedSlashPreviewInput) + .then((result) => { + if (!cancelled) setSlashPreview(result); + }) + .catch(() => { + // Empty preview on error — popover degrades gracefully. + }); + return () => { + cancelled = true; + }; + }, [cwd, debouncedSlashPreviewInput, previewSlashCommand]); const commandDefinition = useMemo(() => { if (!parsedInput?.commandName) return null; diff --git a/apps/desktop/src/renderer/components/Chat/ChatInterface/components/TiptapPromptEditor/TiptapPromptEditor.tsx b/apps/desktop/src/renderer/components/Chat/ChatInterface/components/TiptapPromptEditor/TiptapPromptEditor.tsx index e7ea31c55f5..bde280ae7df 100644 --- a/apps/desktop/src/renderer/components/Chat/ChatInterface/components/TiptapPromptEditor/TiptapPromptEditor.tsx +++ b/apps/desktop/src/renderer/components/Chat/ChatInterface/components/TiptapPromptEditor/TiptapPromptEditor.tsx @@ -1,4 +1,3 @@ -import { chatServiceTrpc } from "@superset/chat/client"; import { usePromptInputAttachments, usePromptInputController, @@ -41,10 +40,14 @@ import { SlashCommandMenu } from "../SlashCommandMenu"; import { FileMentionNode } from "./FileMentionNode"; import { parseTextToEditorContent } from "./parseTextToEditorContent"; import { SlashCommandNode } from "./SlashCommandNode"; -import { SlashCommandPreviewPopover } from "./SlashCommandPreviewPopover"; +import { + type PreviewSlashCommandFn, + SlashCommandPreviewPopover, +} from "./SlashCommandPreviewPopover"; import { serializeEditorToText } from "./serializeEditorToText"; type FileResult = { id: string; name: string; relativePath: string }; +type SearchFilesFn = (query: string) => Promise; type SlashMenuState = { commands: SlashCommand[]; @@ -61,6 +64,8 @@ type MentionState = { export interface TiptapPromptEditorProps { cwd: string; + searchFiles: SearchFilesFn; + previewSlashCommand?: PreviewSlashCommandFn; slashCommands: SlashCommand[]; availableModels?: ModelOption[]; placeholder?: string; @@ -76,6 +81,8 @@ function getDirectoryPath(relativePath: string): string { export function TiptapPromptEditor({ cwd, + searchFiles, + previewSlashCommand, slashCommands, availableModels, placeholder = "Ask to make changes, @mention files, run /commands", @@ -139,28 +146,25 @@ export function TiptapPromptEditor({ mentionState?.query ?? "", 120, ); - const { data: fileResults } = chatServiceTrpc.workspace.searchFiles.useQuery( - { - rootPath: cwd, - query: debouncedMentionQuery, - includeHidden: false, - limit: 20, - }, - { - enabled: - !!mentionState && - !!cwd && - debouncedMentionQuery.length > 0 && - (mentionState?.query?.length ?? 0) > 0, - staleTime: 1000, - placeholderData: (prev) => prev ?? [], - }, - ); + const isMentionVisible = + mentionState !== null && (mentionState?.query?.length ?? 0) > 0; + const [fileResults, setFileResults] = useState([]); + useEffect(() => { + if (!isMentionVisible || !cwd || debouncedMentionQuery.length === 0) return; + let cancelled = false; + searchFiles(debouncedMentionQuery) + .then((results) => { + if (!cancelled) setFileResults(results); + }) + .catch(() => { + // Empty results on error — mention popup degrades gracefully. + }); + return () => { + cancelled = true; + }; + }, [debouncedMentionQuery, cwd, isMentionVisible, searchFiles]); - const mentionFiles: FileResult[] = - mentionState && (mentionState.query?.length ?? 0) > 0 - ? (fileResults ?? []) - : []; + const mentionFiles: FileResult[] = isMentionVisible ? fileResults : []; const mentionFilesRef = useRef(mentionFiles); mentionFilesRef.current = mentionFiles; @@ -634,10 +638,13 @@ export function TiptapPromptEditor({ return ( <> - {/* Slash command params popover — anchored to the chip node */} - {editor && ( + {/* Slash command params popover — anchored to the chip node. + Only rendered when the parent provides a previewSlashCommand + function; v2 ChatPane uses its own SlashCommandPreview instead. */} + {editor && previewSlashCommand && ( ["chat"]["getSlashCommands"] + >, + ) => + commands.map((command) => ({ + ...command, + kind: + command.kind === "builtin" + ? ("builtin" as const) + : ("custom" as const), + source: + command.kind === "builtin" + ? ("builtin" as const) + : ("project" as const), + })), + [], + ); + const { data: slashCommands = [] } = workspaceTrpc.chat.getSlashCommands.useQuery( - { sessionId: sessionId ?? "", workspaceId }, - { - enabled: Boolean(sessionId), - select: (commands) => - commands.map((command) => ({ - ...command, - kind: - command.kind === "builtin" - ? ("builtin" as const) - : ("custom" as const), - source: - command.kind === "builtin" - ? ("builtin" as const) - : ("project" as const), - })), - }, + { workspaceId }, + { select: selectSlashCommands }, ); const chat = useChatDisplay({ sessionId, workspaceId, enabled: Boolean(sessionId), - fps: 60, }); const { commands, @@ -368,38 +378,20 @@ export function ChatPaneInterface({ const sendMessageToSession = useCallback( async (targetSessionId: string, input: ChatSendMessageInput) => { - const queryInput = { + // Optimistic state for this path lives in `pendingUserTurn` (set by + // the caller in handleSend), NOT in the snapshot cache. Writing to + // the cache here was racing with the 4fps snapshot polls — a poll + // could resolve mid-mutation with the harness's pre-message state + // and clobber the optimistic write, making the user message vanish + // briefly. The pendingUserTurn local state is merged in via + // getVisibleMessagesWithPendingUserTurn so it survives stale polls. + await sendMessageMutation.mutateAsync({ sessionId: targetSessionId, workspaceId, - }; - const optimisticMessage = toOptimisticUserMessage(input); - if (optimisticMessage) { - workspaceTrpcUtils.chat.listMessages.setData( - queryInput, - (existingMessages = []) => [...existingMessages, optimisticMessage], - ); - } - - try { - await sendMessageMutation.mutateAsync({ - sessionId: targetSessionId, - workspaceId, - ...input, - }); - } catch (error) { - if (optimisticMessage) { - workspaceTrpcUtils.chat.listMessages.setData( - queryInput, - (existingMessages = []) => - existingMessages.filter( - (message) => message.id !== optimisticMessage.id, - ), - ); - } - throw error; - } + ...input, + }); }, - [workspaceTrpcUtils.chat.listMessages, sendMessageMutation, workspaceId], + [sendMessageMutation, workspaceId], ); const canAbort = Boolean(isRunning); @@ -641,7 +633,25 @@ export function ChatPaneInterface({ if (sessionId && targetSessionId === sessionId) { await commands.sendMessage(sendInput); } else { - await sendMessageToSession(targetSessionId, sendInput); + // New-session path: the existing-session path's optimistic + // state lives inside useChatDisplay, but we don't have a + // session subscribed there yet. Hold the user message in + // pendingUserTurn so getVisibleMessagesWithPendingUserTurn + // keeps it visible across stale snapshot polls until the + // harness's response includes it. + const optimisticMessage = toOptimisticUserMessage(sendInput); + if (optimisticMessage) { + setPendingUserTurn({ + kind: "append", + message: optimisticMessage, + }); + } + try { + await sendMessageToSession(targetSessionId, sendInput); + } catch (error) { + setPendingUserTurn(null); + throw error; + } } if (content) { onUserMessageSubmitted?.(content); diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ChatInputFooter/ChatInputFooter.tsx b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ChatInputFooter/ChatInputFooter.tsx index fdc29ff275b..26a1f078721 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ChatInputFooter/ChatInputFooter.tsx +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ChatInputFooter/ChatInputFooter.tsx @@ -6,6 +6,7 @@ import { usePromptInputController, } from "@superset/ui/ai-elements/prompt-input"; import type { ThinkingLevel } from "@superset/ui/ai-elements/thinking-toggle"; +import { workspaceTrpc } from "@superset/workspace-client"; import type { ChatStatus, FileUIPart } from "ai"; import type React from "react"; import type { ReactNode } from "react"; @@ -29,7 +30,6 @@ import type { LinkedIssue } from "./types"; import { getErrorMessage } from "./utils/getErrorMessage"; interface ChatInputFooterProps { - sessionId: string | null; workspaceId: string; cwd: string; isFocused: boolean; @@ -66,7 +66,6 @@ interface ChatInputFooterProps { } export function ChatInputFooter({ - sessionId, workspaceId, cwd, isFocused, @@ -120,6 +119,24 @@ export function ChatInputFooter({ setLinkedIssues((prev) => prev.filter((issue) => issue.slug !== slug)); }, []); + const trpcUtils = workspaceTrpc.useUtils(); + const searchFiles = useCallback( + async (query: string) => { + const { matches } = await trpcUtils.filesystem.searchFiles.fetch({ + workspaceId, + query, + includeHidden: false, + limit: 20, + }); + return matches.map((m) => ({ + id: m.absolutePath, + name: m.name, + relativePath: m.relativePath, + })); + }, + [trpcUtils, workspaceId], + ); + const handleSend = useCallback( (message: PromptInputMessage) => { if (linkedIssues.length === 0) return onSend(message); @@ -192,12 +209,12 @@ export function ChatInputFooter({ onRemove={removeLinkedIssue} /> (null); useEffect(() => { - if (!sessionId || debouncedSlashPreviewInput.length <= 1) { + if (debouncedSlashPreviewInput.length <= 1) { setSlashPreview(null); return; } @@ -80,7 +78,6 @@ export function SlashCommandPreview({ let cancelled = false; void previewSlashCommand .mutateAsync({ - sessionId, workspaceId, text: debouncedSlashPreviewInput, }) @@ -107,7 +104,7 @@ export function SlashCommandPreview({ return () => { cancelled = true; }; - }, [debouncedSlashPreviewInput, previewSlashCommand, sessionId, workspaceId]); + }, [debouncedSlashPreviewInput, previewSlashCommand, workspaceId]); const commandDefinition = useMemo(() => { if (!parsedInput?.commandName) return null; diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ModelPicker/ModelPicker.tsx b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ModelPicker/ModelPicker.tsx index c8ddf0eb44e..d9b03ee2a41 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ModelPicker/ModelPicker.tsx +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/components/ModelPicker/ModelPicker.tsx @@ -1,4 +1,3 @@ -import { chatServiceTrpc } from "@superset/chat/client"; import { ModelSelector, ModelSelectorContent, @@ -10,6 +9,7 @@ import { } from "@superset/ui/ai-elements/model-selector"; import { PromptInputButton } from "@superset/ui/ai-elements/prompt-input"; import { claudeIcon } from "@superset/ui/icons/preset-icons"; +import { workspaceTrpc } from "@superset/workspace-client"; import { useNavigate } from "@tanstack/react-router"; import { ChevronDownIcon } from "lucide-react"; import { useEffect, useMemo } from "react"; @@ -43,9 +43,9 @@ export function ModelPicker({ ? providerToLogo(selectedModel.provider) : null; const { data: anthropicStatus, refetch: refetchAnthropicStatus } = - chatServiceTrpc.auth.getAnthropicStatus.useQuery(); + workspaceTrpc.auth.getAnthropicStatus.useQuery(); const { data: openAIStatus, refetch: refetchOpenAIStatus } = - chatServiceTrpc.auth.getOpenAIStatus.useQuery(); + workspaceTrpc.auth.getOpenAIStatus.useQuery(); useEffect(() => { if (!open) return; diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/hooks/useSlashCommandExecutor/useSlashCommandExecutor.ts b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/hooks/useSlashCommandExecutor/useSlashCommandExecutor.ts index a89531f1073..d2f74cec0b7 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/hooks/useSlashCommandExecutor/useSlashCommandExecutor.ts +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/components/WorkspaceChatInterface/hooks/useSlashCommandExecutor/useSlashCommandExecutor.ts @@ -9,6 +9,7 @@ import { findModelByQuery, normalizeModelQueryFromActionArgument, } from "./model-query"; +import { resolveSlashPromptResult } from "./prompt-result"; interface UseSlashCommandExecutorOptions { sessionId: string | null; @@ -49,6 +50,8 @@ export function useSlashCommandExecutor({ onTrackEvent, }: UseSlashCommandExecutorOptions) { const workspaceTrpcUtils = workspaceTrpc.useUtils(); + const { mutateAsync: resolveSlashCommandMutateAsync } = + workspaceTrpc.chat.resolveSlashCommand.useMutation(); const resolveSlashCommandInput = useCallback( async (inputText: string): Promise => { @@ -57,21 +60,6 @@ export function useSlashCommandExecutor({ return { handled: false, nextText: text }; } - if (!sessionId) { - if (text === "/new" || text === "/clear") { - onClearError(); - await onResetSession(); - toast.success( - text === "/clear" - ? "Context cleared in a new chat session" - : "Started a new chat session", - ); - return { handled: true, nextText: "" }; - } - - return { handled: false, nextText: text }; - } - try { const [commandNameRaw, ...rest] = text.slice(1).split(/\s+/); const commandName = commandNameRaw?.toLowerCase() ?? ""; @@ -164,8 +152,41 @@ export function useSlashCommandExecutor({ }); return { handled: true, nextText: "" }; } - default: - return { handled: false, nextText: text }; + default: { + // Custom slash command — resolve via host-service so prompts + // from .claude/commands and .agents/commands get substituted. + // Workspace-scoped: works whether or not a session exists yet. + const resolved = await resolveSlashCommandMutateAsync({ + workspaceId, + text, + }); + if (!resolved.handled) { + return { handled: false, nextText: text }; + } + const promptResolution = resolveSlashPromptResult({ + handled: resolved.handled, + prompt: resolved.prompt, + commandName: resolved.commandName, + invokedAs: resolved.invokedAs, + }); + if (promptResolution.errorMessage) { + onSetErrorMessage(promptResolution.errorMessage); + toast.error(promptResolution.errorMessage); + return { handled: true, nextText: "" }; + } + onClearError(); + if (promptResolution.handled) { + onTrackEvent?.("chat_slash_command_used", { + command_name: + resolved.invokedAs ?? resolved.commandName ?? commandName, + command_type: "prompt", + }); + } + return { + handled: promptResolution.handled, + nextText: promptResolution.nextText, + }; + } } } catch (error) { console.warn( @@ -189,6 +210,7 @@ export function useSlashCommandExecutor({ loadMcpOverview, onResetSession, onStopActiveResponse, + resolveSlashCommandMutateAsync, sessionId, workspaceId, workspaceTrpcUtils.chat.getMcpOverview, diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatController/useWorkspaceChatController.ts b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatController/useWorkspaceChatController.ts index 6aaa701f9e1..763b1c02e4c 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatController/useWorkspaceChatController.ts +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatController/useWorkspaceChatController.ts @@ -68,6 +68,7 @@ export function useWorkspaceChatController({ session?.session?.activeOrganizationId, ); const collections = useCollections(); + const endSessionMutation = workspaceTrpc.chat.endSession.useMutation(); const { chatSessions: chatSessionActions } = useOptimisticCollectionActions(); const { data: workspace } = workspaceTrpc.workspace.get.useQuery( @@ -105,6 +106,11 @@ export function useWorkspaceChatController({ if (!transaction && !isDesktopChatDevMode()) { throw new Error("Failed to delete chat session"); } + // Tear down the host-service in-memory runtime so it doesn't leak. + // Failures here must not block the user-visible delete. + void endSessionMutation + .mutateAsync({ sessionId: sessionIdToDelete, workspaceId }) + .catch(() => {}); posthog.capture("chat_session_deleted", { workspace_id: workspaceId, @@ -117,6 +123,7 @@ export function useWorkspaceChatController({ }, [ chatSessionActions, + endSessionMutation, onSessionIdChange, organizationId, sessionId, diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatDisplay/useWorkspaceChatDisplay.ts b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatDisplay/useWorkspaceChatDisplay.ts index fb3965347ff..2c42d1dfc82 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatDisplay/useWorkspaceChatDisplay.ts +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/components/ChatPane/hooks/useWorkspaceChatDisplay/useWorkspaceChatDisplay.ts @@ -19,8 +19,9 @@ type RouterInputs = inferRouterInputs; type RouterOutputs = inferRouterOutputs; type ChatInputs = RouterInputs["chat"]; type ChatOutputs = RouterOutputs["chat"]; -type DisplayStateOutput = ChatOutputs["getDisplayState"]; -type ListMessagesOutput = ChatOutputs["listMessages"]; +type SnapshotOutput = ChatOutputs["getSnapshot"]; +type DisplayStateOutput = SnapshotOutput["displayState"]; +type ListMessagesOutput = SnapshotOutput["messages"]; type HistoryMessage = ListMessagesOutput[number]; type HistoryMessagePart = HistoryMessage["content"][number]; type SendMessageInput = ChatInputs["sendMessage"]; @@ -62,7 +63,7 @@ function withoutActiveTurnAssistantHistory({ isRunning, }: { messages: ListMessagesOutput; - currentMessage: NonNullable["currentMessage"] | null; + currentMessage: DisplayStateOutput["currentMessage"] | null; isRunning: boolean; }): ListMessagesOutput { if (!isRunning || !currentMessage || currentMessage.role !== "assistant") { @@ -120,12 +121,7 @@ export function useChatDisplay(options: UseChatDisplayOptions) { refetchOnWindowFocus: false, } as const; - const displayQuery = workspaceTrpc.chat.getDisplayState.useQuery( - queryInput as { sessionId: string; workspaceId: string }, - queryOptions, - ); - - const messagesQuery = workspaceTrpc.chat.listMessages.useQuery( + const snapshotQuery = workspaceTrpc.chat.getSnapshot.useQuery( queryInput as { sessionId: string; workspaceId: string }, queryOptions, ); @@ -138,7 +134,8 @@ export function useChatDisplay(options: UseChatDisplayOptions) { workspaceTrpc.chat.respondToQuestion.useMutation(); const respondToPlanMutation = workspaceTrpc.chat.respondToPlan.useMutation(); - const displayState = displayQuery.data ?? null; + const snapshot = snapshotQuery.data ?? null; + const displayState = snapshot?.displayState ?? null; const runtimeErrorMessage = typeof displayState?.errorMessage === "string" && displayState.errorMessage.trim() @@ -148,9 +145,9 @@ export function useChatDisplay(options: UseChatDisplayOptions) { const isRunning = displayState?.isRunning ?? false; const isConversationLoading = isQueryEnabled && - messagesQuery.data === undefined && - (messagesQuery.isLoading || messagesQuery.isFetching); - const historicalMessages = messagesQuery.data ?? []; + snapshotQuery.data === undefined && + (snapshotQuery.isLoading || snapshotQuery.isFetching); + const historicalMessages = snapshot?.messages ?? []; const latestAssistantErrorMessage = isRunning ? null : findLatestAssistantErrorMessage(historicalMessages); @@ -355,8 +352,7 @@ export function useChatDisplay(options: UseChatDisplayOptions) { error: runtimeErrorMessage ?? latestAssistantErrorMessage ?? - displayQuery.error ?? - messagesQuery.error ?? + snapshotQuery.error ?? commandError ?? null, commands, diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/usePaneRegistry.tsx b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/usePaneRegistry.tsx index ef7e01d6d87..2f3ca1ab9fc 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/usePaneRegistry.tsx +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/hooks/usePaneRegistry/usePaneRegistry.tsx @@ -41,6 +41,7 @@ import { } from "../../state/fileDocumentStore"; import type { BrowserPaneData, + ChatPaneData, CommentPaneData, DevtoolsPaneData, FilePaneData, @@ -48,6 +49,7 @@ import type { TerminalPaneData, } from "../../types"; import { BrowserPane, BrowserPaneToolbar } from "./components/BrowserPane"; +import { ChatPane } from "./components/ChatPane"; import { CommentPane } from "./components/CommentPane"; import { DiffPane } from "./components/DiffPane"; import { FilePane } from "./components/FilePane"; @@ -535,13 +537,22 @@ export function usePaneRegistry( /> ), - // Disabled until ChatServiceProvider is wired above v2 panes — - // TiptapPromptEditor needs its tRPC context. - renderPane: (_ctx: RendererContext) => ( -
- Chat pane is temporarily disabled. -
- ), + renderPane: (ctx: RendererContext) => { + const data = ctx.pane.data as ChatPaneData; + return ( + + ctx.actions.updateData({ ...data, sessionId: id }) + } + initialLaunchConfig={data.launchConfig ?? null} + onConsumeLaunchConfig={() => + ctx.actions.updateData({ ...data, launchConfig: null }) + } + /> + ); + }, contextMenuActions: (_ctx, defaults) => defaults.map((d) => d.key === "close-pane" ? { ...d, label: "Close Chat" } : d, diff --git a/bun.lock b/bun.lock index 1aedc17e7dd..c3edc0df2a0 100644 --- a/bun.lock +++ b/bun.lock @@ -110,7 +110,7 @@ }, "apps/desktop": { "name": "@superset/desktop", - "version": "1.5.10", + "version": "1.6.2", "dependencies": { "@ai-sdk/anthropic": "^3.0.43", "@ai-sdk/openai": "3.0.36", diff --git a/packages/chat/src/server/desktop/index.ts b/packages/chat/src/server/desktop/index.ts index 06df62dddd6..390d2dc1a8d 100644 --- a/packages/chat/src/server/desktop/index.ts +++ b/packages/chat/src/server/desktop/index.ts @@ -16,4 +16,9 @@ export { export { ChatService } from "./chat-service"; export type { ChatServiceRouter } from "./router"; export { createChatServiceRouter } from "./router"; +export type { SlashCommand } from "./slash-commands"; +export { + getSlashCommands, + resolveSlashCommand, +} from "./slash-commands"; export { generateTitleFromMessage } from "./title-generation"; diff --git a/packages/host-service/src/app.ts b/packages/host-service/src/app.ts index 6545be5a280..f374d28390e 100644 --- a/packages/host-service/src/app.ts +++ b/packages/host-service/src/app.ts @@ -1,6 +1,7 @@ import { createNodeWebSocket } from "@hono/node-ws"; import { trpcServer } from "@hono/trpc-server"; import { Octokit } from "@octokit/rest"; +import { ChatService } from "@superset/chat/server/desktop"; import type { MiddlewareHandler } from "hono"; import { Hono } from "hono"; import { cors } from "hono/cors"; @@ -69,8 +70,13 @@ export function createApp(options: CreateAppOptions): CreateAppResult { db, runtimeResolver: providers.modelResolver, }); + // Provider auth (Anthropic / OpenAI OAuth + API keys) is per-machine, not + // per-workspace. ChatService is a long-lived singleton wrapping mastra's + // auth storage; the `host.auth.*` router proxies to it. + const chatService = new ChatService(); const runtime = { + auth: chatService, chat: chatRuntime, filesystem, pullRequests: pullRequestRuntime, diff --git a/packages/host-service/src/runtime/chat/chat.ts b/packages/host-service/src/runtime/chat/chat.ts index ed9f83bb871..a434dfd31a1 100644 --- a/packages/host-service/src/runtime/chat/chat.ts +++ b/packages/host-service/src/runtime/chat/chat.ts @@ -1,5 +1,9 @@ +import { + getSlashCommands as getSlashCommandsFromCwd, + resolveSlashCommand as resolveSlashCommandFromCwd, +} from "@superset/chat/server/desktop"; import { eq } from "drizzle-orm"; -import { createMastraCode } from "mastracode"; +import { createAuthStorage, createMastraCode } from "mastracode"; import type { HostDb } from "../../db"; import { workspaces } from "../../db/schema"; import type { ModelProviderRuntimeResolver } from "../../providers/model-providers"; @@ -272,6 +276,35 @@ function toRuntimeErrorMessage(error: unknown): string { return "Unexpected chat error"; } +/** + * Pick the model mastra should use for observational-memory reflection + * (a background task that runs after each turn). Mastra's default is + * google/gemini-2.5-flash, which fails when GOOGLE_GENERATIVE_AI_API_KEY + * is unset — we fall back to whichever provider the user has actually + * authenticated with so reflection just uses those credentials. + */ +function resolveOmModelFromAuth(): string | undefined { + if (process.env.GOOGLE_GENERATIVE_AI_API_KEY) + return "google/gemini-2.5-flash"; + const authStorage = createAuthStorage(); + authStorage.reload(); + const anthropic = authStorage.get("anthropic"); + if ( + anthropic?.type === "oauth" || + (anthropic?.type === "api_key" && anthropic.key.trim()) + ) { + return "anthropic/claude-haiku-4-5"; + } + const openai = authStorage.get("openai-codex"); + if ( + openai?.type === "oauth" || + (openai?.type === "api_key" && openai.key.trim()) + ) { + return "openai/gpt-4.1-nano"; + } + return undefined; +} + async function getRuntimeMemoryStore( runtime: RuntimeSession, ): Promise { @@ -354,13 +387,18 @@ async function restartRuntimeFromUserMessage( await runtime.harness.sendMessage(input.payload); } +interface InflightRuntimeCreation { + workspaceId: string; + promise: Promise; +} + export class ChatRuntimeManager { private readonly db: HostDb; private readonly runtimeResolver: ModelProviderRuntimeResolver; private readonly runtimes = new Map(); private readonly runtimeCreations = new Map< string, - Promise + InflightRuntimeCreation >(); constructor(options: ChatRuntimeManagerOptions) { @@ -420,9 +458,16 @@ export class ChatRuntimeManager { await this.runtimeResolver.prepareRuntimeEnv(); + const omModel = resolveOmModelFromAuth(); const runtime = await createMastraCode({ cwd, disableMcp: true, + ...(omModel && { + initialState: { + observerModelId: omModel, + reflectorModelId: omModel, + }, + }), }); runtime.hookManager?.setSessionId(sessionId); await runtime.harness.init(); @@ -462,24 +507,79 @@ export class ChatRuntimeManager { const inflight = this.runtimeCreations.get(sessionId); if (inflight) { - return inflight; + if (inflight.workspaceId !== workspaceId) { + throw new Error( + `Session ${sessionId} is already being created for workspace ${inflight.workspaceId}`, + ); + } + return inflight.promise; } - const creation = this.createRuntime(sessionId, workspaceId).finally(() => { + const promise = this.createRuntime(sessionId, workspaceId).finally(() => { this.runtimeCreations.delete(sessionId); }); - this.runtimeCreations.set(sessionId, creation); - return creation; + this.runtimeCreations.set(sessionId, { workspaceId, promise }); + return promise; } - async getDisplayState(input: { - sessionId: string; - workspaceId: string; - }): Promise { - const runtime = await this.getOrCreateRuntime( - input.sessionId, - input.workspaceId, - ); + /** + * Tear down the in-memory runtime for a session. Aborts any in-flight + * work, disconnects MCP servers, removes the runtime from the manager's + * map, and is a no-op for unknown session ids. Should be called after + * the cloud session row is deleted, or when a workspace is deleted. + * + * Validates `workspaceId` against the runtime / in-flight creation so a + * caller can't dispose a session bound to a different workspace. + * + * If a creation is in-flight for this session, awaits it first so the + * just-created runtime doesn't get inserted into `runtimes` after we + * delete from it (which would leak). + */ + async disposeRuntime(sessionId: string, workspaceId: string): Promise { + const inflight = this.runtimeCreations.get(sessionId); + if (inflight) { + if (inflight.workspaceId !== workspaceId) { + throw new Error( + `Session ${sessionId} is being created for workspace ${inflight.workspaceId}`, + ); + } + try { + await inflight.promise; + } catch { + // Creation failed — nothing to dispose. + return; + } + } + + const runtime = this.runtimes.get(sessionId); + if (!runtime) return; + + if (runtime.workspaceId !== workspaceId) { + throw new Error( + `Session ${sessionId} is bound to workspace ${runtime.workspaceId}`, + ); + } + + try { + runtime.harness.abort(); + } catch { + // best-effort — proceed with cleanup even if abort fails + } + try { + await runtime.mcpManager?.disconnect(); + } catch { + // best-effort — MCP servers may already be disconnected + } + this.runtimes.delete(sessionId); + } + + /** + * Shape the harness's raw display state into the shape the renderer + * expects. Both getDisplayState and getSnapshot must apply the same + * shaping — keep this the single source of truth so the two functions + * cannot drift. + */ + private buildDisplayState(runtime: RuntimeSession): ChatDisplayState { const displayState = runtime.harness.getDisplayState(); const currentMessage = displayState.currentMessage as { role?: string; @@ -522,6 +622,17 @@ export class ChatRuntimeManager { }; } + async getDisplayState(input: { + sessionId: string; + workspaceId: string; + }): Promise { + const runtime = await this.getOrCreateRuntime( + input.sessionId, + input.workspaceId, + ); + return this.buildDisplayState(runtime); + } + async listMessages(input: { sessionId: string; workspaceId: string; @@ -533,6 +644,36 @@ export class ChatRuntimeManager { return runtime.harness.listMessages(); } + /** + * Single server-side observation that returns both displayState and messages + * from one runtime acquisition. This avoids the dual-poll race between + * independent getDisplayState / listMessages queries on the client. + * + * Note: not a fully locked atomic snapshot — listMessages() is async, so + * harness state can change between the displayState read and the messages + * read. This still removes the *client-side* two-query race, which is the + * one that caused mismatched message/display state. + */ + async getSnapshot(input: { + sessionId: string; + workspaceId: string; + }): Promise<{ + displayState: ChatDisplayState; + messages: RuntimeMessages; + }> { + const runtime = await this.getOrCreateRuntime( + input.sessionId, + input.workspaceId, + ); + const displayState = this.buildDisplayState(runtime); + const messages = await runtime.harness.listMessages(); + // Intentionally no observedAt: when the harness state hasn't changed, + // the response object is structurally identical to the previous poll's + // response, so React Query's structuralSharing preserves the object + // identity and idle polls don't trigger downstream rerenders. + return { displayState, messages }; + } + async sendMessage( input: ChatSendMessageInput, ): Promise { @@ -612,39 +753,41 @@ export class ChatRuntimeManager { return runtime.harness.respondToPlanApproval(input.payload); } - async getSlashCommands(_input: { - sessionId: string; - workspaceId: string; - }): Promise< + private resolveWorkspaceCwd(workspaceId: string): string { + const workspace = this.db.query.workspaces + .findFirst({ where: eq(workspaces.id, workspaceId) }) + .sync(); + if (!workspace) { + throw new Error(`Workspace not found: ${workspaceId}`); + } + return workspace.worktreePath; + } + + async getSlashCommands(input: { workspaceId: string }): Promise< Array<{ name: string; aliases: string[]; description: string; argumentHint: string; - kind: "builtin" | "prompt"; + kind: "builtin" | "custom"; }> > { - return []; + const cwd = this.resolveWorkspaceCwd(input.workspaceId); + return getSlashCommandsFromCwd(cwd).map((command) => ({ + name: command.name, + aliases: command.aliases, + description: command.description, + argumentHint: command.argumentHint, + kind: command.kind, + })); } - async resolveSlashCommand(input: { - sessionId: string; - workspaceId: string; - text: string; - }) { - return { - handled: false, - invokedAs: input.text.trim().startsWith("/") - ? input.text.trim() - : undefined, - }; + async resolveSlashCommand(input: { workspaceId: string; text: string }) { + const cwd = this.resolveWorkspaceCwd(input.workspaceId); + return resolveSlashCommandFromCwd(cwd, input.text); } - async previewSlashCommand(input: { - sessionId: string; - workspaceId: string; - text: string; - }) { + async previewSlashCommand(input: { workspaceId: string; text: string }) { return this.resolveSlashCommand(input); } diff --git a/packages/host-service/src/trpc/router/auth/auth.ts b/packages/host-service/src/trpc/router/auth/auth.ts new file mode 100644 index 00000000000..5e45b377abd --- /dev/null +++ b/packages/host-service/src/trpc/router/auth/auth.ts @@ -0,0 +1,83 @@ +import { z } from "zod"; +import { protectedProcedure, router } from "../../index"; + +const anthropicOAuthCodeInput = z.object({ + code: z.string().min(1), +}); +const openAIOAuthCodeInput = z.object({ + code: z.string().optional(), +}); +const anthropicApiKeyInput = z.object({ + apiKey: z.string().min(1), +}); +const openAIApiKeyInput = z.object({ + apiKey: z.string().min(1), +}); +const anthropicEnvConfigInput = z.object({ + envText: z.string(), +}); + +export const authRouter = router({ + getAnthropicStatus: protectedProcedure.query(({ ctx }) => { + return ctx.runtime.auth.getAnthropicAuthStatus(); + }), + startAnthropicOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.startAnthropicOAuth(); + }), + completeAnthropicOAuth: protectedProcedure + .input(anthropicOAuthCodeInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.completeAnthropicOAuth({ code: input.code }); + }), + cancelAnthropicOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.cancelAnthropicOAuth(); + }), + disconnectAnthropicOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.disconnectAnthropicOAuth(); + }), + setAnthropicApiKey: protectedProcedure + .input(anthropicApiKeyInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.setAnthropicApiKey({ apiKey: input.apiKey }); + }), + clearAnthropicApiKey: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.clearAnthropicApiKey(); + }), + getAnthropicEnvConfig: protectedProcedure.query(({ ctx }) => { + return ctx.runtime.auth.getAnthropicEnvConfig(); + }), + setAnthropicEnvConfig: protectedProcedure + .input(anthropicEnvConfigInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.setAnthropicEnvConfig({ envText: input.envText }); + }), + clearAnthropicEnvConfig: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.clearAnthropicEnvConfig(); + }), + + getOpenAIStatus: protectedProcedure.query(({ ctx }) => { + return ctx.runtime.auth.getOpenAIAuthStatus(); + }), + startOpenAIOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.startOpenAIOAuth(); + }), + completeOpenAIOAuth: protectedProcedure + .input(openAIOAuthCodeInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.completeOpenAIOAuth({ code: input.code }); + }), + cancelOpenAIOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.cancelOpenAIOAuth(); + }), + disconnectOpenAIOAuth: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.disconnectOpenAIOAuth(); + }), + setOpenAIApiKey: protectedProcedure + .input(openAIApiKeyInput) + .mutation(({ ctx, input }) => { + return ctx.runtime.auth.setOpenAIApiKey({ apiKey: input.apiKey }); + }), + clearOpenAIApiKey: protectedProcedure.mutation(({ ctx }) => { + return ctx.runtime.auth.clearOpenAIApiKey(); + }), +}); diff --git a/packages/host-service/src/trpc/router/auth/index.ts b/packages/host-service/src/trpc/router/auth/index.ts new file mode 100644 index 00000000000..4f82de4a3ba --- /dev/null +++ b/packages/host-service/src/trpc/router/auth/index.ts @@ -0,0 +1 @@ +export { authRouter } from "./auth"; diff --git a/packages/host-service/src/trpc/router/chat/chat.ts b/packages/host-service/src/trpc/router/chat/chat.ts index 5e639ef0fa5..c6debcbe747 100644 --- a/packages/host-service/src/trpc/router/chat/chat.ts +++ b/packages/host-service/src/trpc/router/chat/chat.ts @@ -8,6 +8,13 @@ const sessionInput = z.object({ workspaceId: z.uuid(), }); +// Slash-command discovery / preview / resolve are workspace-scoped, not +// session-scoped — they only need a workspaceId so they work in fresh +// chats before the first message creates a session. +const workspaceSlashInput = z.object({ + workspaceId: z.uuid(), +}); + const sendMessagePayloadSchema = z.object({ content: z.string(), files: z @@ -41,6 +48,12 @@ export const chatRouter = router({ return ctx.runtime.chat.listMessages(input); }), + getSnapshot: protectedProcedure + .input(sessionInput) + .query(({ ctx, input }) => { + return ctx.runtime.chat.getSnapshot(input); + }), + sendMessage: protectedProcedure .input( sessionInput.extend({ @@ -48,8 +61,22 @@ export const chatRouter = router({ metadata: messageMetadataSchema, }), ) - .mutation(({ ctx, input }) => { - return ctx.runtime.chat.sendMessage(input); + .mutation(async ({ ctx, input }) => { + const result = await ctx.runtime.chat.sendMessage(input); + // Fire-and-forget cloud lastActiveAt update so the session selector + // keeps reordering after activity. Failures here must not block the + // turn — the user already sees their message land via the snapshot. + void ctx.api.chat.updateSession + .mutate({ sessionId: input.sessionId, lastActiveAt: new Date() }) + .catch(() => {}); + return result; + }), + + endSession: protectedProcedure + .input(sessionInput) + .mutation(async ({ ctx, input }) => { + await ctx.runtime.chat.disposeRuntime(input.sessionId, input.workspaceId); + return { ok: true }; }), restartFromMessage: protectedProcedure @@ -110,14 +137,14 @@ export const chatRouter = router({ }), getSlashCommands: protectedProcedure - .input(sessionInput) + .input(workspaceSlashInput) .query(({ ctx, input }) => { return ctx.runtime.chat.getSlashCommands(input); }), resolveSlashCommand: protectedProcedure .input( - sessionInput.extend({ + workspaceSlashInput.extend({ text: z.string(), }), ) @@ -127,7 +154,7 @@ export const chatRouter = router({ previewSlashCommand: protectedProcedure .input( - sessionInput.extend({ + workspaceSlashInput.extend({ text: z.string(), }), ) diff --git a/packages/host-service/src/trpc/router/router.ts b/packages/host-service/src/trpc/router/router.ts index a81ada2949e..fc3991cc7d8 100644 --- a/packages/host-service/src/trpc/router/router.ts +++ b/packages/host-service/src/trpc/router/router.ts @@ -1,4 +1,5 @@ import { router } from "../index"; +import { authRouter } from "./auth"; import { chatRouter } from "./chat"; import { cloudRouter } from "./cloud"; import { filesystemRouter } from "./filesystem"; @@ -16,6 +17,7 @@ import { workspaceCleanupRouter } from "./workspace-cleanup"; import { workspaceCreationRouter } from "./workspace-creation"; export const appRouter = router({ + auth: authRouter, health: healthRouter, host: hostRouter, chat: chatRouter, diff --git a/packages/host-service/src/types.ts b/packages/host-service/src/types.ts index 148b1c696b0..cfd5adb3bb9 100644 --- a/packages/host-service/src/types.ts +++ b/packages/host-service/src/types.ts @@ -1,4 +1,5 @@ import type { Octokit } from "@octokit/rest"; +import type { ChatService } from "@superset/chat/server/desktop"; import type { AppRouter } from "@superset/trpc"; import type { TRPCClient } from "@trpc/client"; import type { HostDb } from "./db"; @@ -11,6 +12,7 @@ import type { PullRequestRuntimeManager } from "./runtime/pull-requests"; export type ApiClient = TRPCClient; export interface HostServiceRuntime { + auth: ChatService; chat: ChatRuntimeManager; filesystem: WorkspaceFilesystemManager; pullRequests: PullRequestRuntimeManager; diff --git a/packages/trpc/src/router/chat/chat.ts b/packages/trpc/src/router/chat/chat.ts index b1efb5243c4..0ffeaf658e9 100644 --- a/packages/trpc/src/router/chat/chat.ts +++ b/packages/trpc/src/router/chat/chat.ts @@ -93,6 +93,7 @@ export const chatRouter = { z.object({ sessionId: z.uuid(), title: z.string().optional(), + lastActiveAt: z.date().optional(), }), ) .mutation(async ({ ctx, input }) => { @@ -109,6 +110,9 @@ export const chatRouter = { if (input.title !== undefined) { updates.title = input.title; } + if (input.lastActiveAt !== undefined) { + updates.lastActiveAt = input.lastActiveAt; + } if (Object.keys(updates).length === 0) { return { updated: false }; diff --git a/plans/v1-to-v2-fast-migration.md b/plans/v1-to-v2-fast-migration.md new file mode 100644 index 00000000000..e6bbfaeb644 --- /dev/null +++ b/plans/v1-to-v2-fast-migration.md @@ -0,0 +1,384 @@ +# V1 -> V2 Fast Migration Plan + +A pragmatic plan to ship the v1 chat UX on top of the existing v2 host-service chat architecture. This is **not** the full event-log rearchitect from `v2-chat-greenfield-architecture.md`, and it should **not replace** the host-service chat work that already exists. The current host-service implementation is the foundation; this plan updates the remaining migration work around it. + +## TL;DR + +- **Keep** the existing host-service chat implementation: + - `packages/host-service/src/runtime/chat/chat.ts` + - `packages/host-service/src/trpc/router/chat/chat.ts` + - `packages/host-service/src/providers/model-providers/` + - `packages/workspace-client` +- **Keep** v1 client UX code where possible: `ChatPane`, `ChatPaneInterface`, `useChatPaneController`, `useChatDisplay`, composer, approval/question dialogs, model picker, MCP UI. +- **Do not move canonical session metadata into host-service.** Cloud remains the owner of `chat_sessions`; host-service owns local runtime execution. +- **Collapse** the dual-poll race with `getSnapshot()` on the existing host-service chat router, then wire clients to consume that snapshot. +- **Add a compatibility/adaptation layer** between v1's `chatRuntimeServiceTrpc.session.*` shape and host-service's `workspaceTrpc.chat.*` shape instead of rewriting host-service around v1. +- **Ship behind a per-workspace flag** so the old Electron IPC chat runtime remains a rollback path during bake. + +Scope: roughly 1-2 weeks of implementation plus bake time, assuming the existing host-service chat runtime stays in place and the migration focuses on parity, adapter wiring, and rollout. + +### Fixes at a glance + +Quick scan of every concrete fix in this plan, ordered by priority. Each links to its phase below. + +| # | Severity | Fix | Phase | +|---|---|---|---| +| 1 | HIGH | Runtime disposal on session delete (no leak) | P0 Fix #1 | +| 2 | HIGH | Cross-workspace `sessionId` race in runtime creation | P0 Fix #2 | +| 3 | MEDIUM | Collapse `getDisplayState` + `listMessages` into single `getSnapshot` | P1 | +| 4 | MEDIUM | Drop `fps: 60` polling override at `ChatPaneInterface.tsx:287` | P1 | +| 5 | MEDIUM | Update cloud `lastActiveAt` after host send (selector ordering) | P1 | +| 6 | MEDIUM | Implement slash command resolution (currently stubs) | P4 | +| 7 | MEDIUM | Add `searchFiles` for `@file` mention autocomplete (missing entirely) | P4 | +| 8 | MEDIUM | Wire `SessionStart` / `SessionEnd` / `UserPromptSubmit` hooks (Stop / Notification hooks deferred) | P4 | +| 9 | MEDIUM | Wire title generation via cloud `chat.updateTitle` | P4 | +| 10 | MEDIUM | Decide Superset MCP tools strategy (defer or port) | P4 | +| 11 | MEDIUM | Decide MCP overview / auth strategy (defer or port) | P4 | +| 12 | LOW | Real model-provider auth state (no hardcoded `isAnthropicAuthenticated = true`) | P4 | +| 13 | LOW | Optional: validate `(sessionId, workspaceId)` against cloud at runtime create | P0 (decision) | +| 14 | LOW | Mastra memory store guard in `restartFromMessage` | Lower-Risk Notes | +| 15 | LOW | Comment on `process.env` mutation in `applyRuntimeEnv` | Lower-Risk Notes | +| 16 | LOW | Confirm `protectedProcedure` end-to-end | Lower-Risk Notes | + +P0 (HIGH) lands first as the prerequisite. P1 and P4 are independent of each other and can run in parallel after P0. P2-P3 (adapter, bootstrap migration) and P5-P6 (rollout, deletion) wrap around them. + +## Current Host-Service Chat State + +The host-service chat path already exists and should be preserved. + +| Area | Current implementation | Notes | +|---|---|---| +| Runtime owner | `packages/host-service/src/runtime/chat/chat.ts` | `ChatRuntimeManager` owns in-memory `RuntimeSession` instances keyed by `sessionId`. It resolves `workspaceId -> worktreePath`, creates Mastracode runtimes, and exposes chat runtime methods. | +| Router | `packages/host-service/src/trpc/router/chat/chat.ts` | Mounted as `chat` in `packages/host-service/src/trpc/router/router.ts`. Uses host-service auth and calls `ctx.runtime.chat.*`. | +| App wiring | `packages/host-service/src/app.ts` | Creates `ChatRuntimeManager` and mounts it as `runtime.chat`. | +| Model provider bridge | `packages/host-service/src/providers/model-providers/` | `LocalModelProvider` and `CloudModelProvider` implement `ModelProviderRuntimeResolver` for runtime env preparation. | +| Renderer client | `packages/workspace-client` | `workspaceTrpc` talks to host-service over local HTTP. The v2 workspace route already uses this path. | +| Existing v2 consumer | `apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/.../ChatPane` | Calls `workspaceTrpc.chat.*` directly with `{ sessionId, workspaceId }`. | +| Cloud session metadata | `packages/trpc/src/router/chat/chat.ts` | Cloud tRPC already has `createSession`, `deleteSession`, `updateTitle`, `uploadAttachment`, and `getModels`. | +| Legacy v1 runtime path | `packages/chat/src/server/trpc/service.ts` + Electron IPC router | Still powers the old `ChatPane` path today. | + +The migration should converge the old v1 pane onto this host-service path without discarding the host-service runtime/router. + +## Goals + +1. **Host-service remains the single owner of local chat runtime execution.** +2. **Preserve v1 UX parity** while swapping the runtime transport under it. +3. **Close the dual-poll race** with a single snapshot query on host-service and the legacy IPC path during migration. +4. **Keep cloud as the canonical session metadata owner** for `chat_sessions`, titles, attachments, and models. +5. **Keep rollback simple** by routing per workspace through either the existing Electron IPC runtime or host-service. +6. **Avoid blocking greenfield work.** This plan should make the host-service boundary stable so event-log work can build on top of it. + +## Non-Goals + +- Not replacing or rewriting the existing host-service chat runtime. +- Not moving canonical `chat_sessions` ownership from cloud Postgres into host-service. +- Not introducing the event log, sequence numbers, gap detection, or a durable local chat store. +- Not solving multi-device ownership or session-host affinity end-to-end. +- Not removing the v2 workspace chat path. That path is the proof point for host-service chat and should continue to improve. +- Not making provider credential handling elegant in this migration. It only needs to preserve current working behavior and leave the cleaner abstraction for follow-up. + +## Implementation Audit + +Detailed walk of the existing host-service chat code, what's load-bearing, what's stubbed, and what's actually broken. The migration phases below reference this section. + +### What's solid (don't rewrite) + +1. **Mastra harness lifecycle** in `ChatRuntimeManager` — `init()` → `setResourceId()` → `selectOrCreateThread()` → event subscription. The structure is right. +2. **Concurrent-creation guard** via `runtimeCreations` map (lines 316, 442-450) prevents two requests for the same session from both spinning up runtimes — but see [Bug #2](#verified-bugs) below for a real defect in the keying. +3. **Error normalization** (lines 189-228) strips `AI_APICallError` prefix and extracts nested error messages. UX-load-bearing, easy to break, leave alone. +4. **Workspace DB resolution** at create time (line 392) — `workspaceId` → `worktreePath` lookup is cleaner than v1's `cwd` passthrough. +5. **Restart-from-message** (lines 247-310) uses Mastra's memory store correctly to clone the thread and re-send from a target message. +6. **AGENTS.md injection** (lines 359-381) only writes if missing or previously written by Superset — safe re-entrance. +7. **Model provider abstraction** (`CloudModelProvider` / `LocalModelProvider`) gates runtime creation on `hasUsableRuntimeEnv()` and tracks env keys for cleanup. Right shape. + +### Stubbed in `ChatRuntimeManager` (lines 594-635) + +```ts +getSlashCommands() → [] // TODO +resolveSlashCommand() → { handled: false } // TODO +previewSlashCommand() → { handled: false } // TODO +getMcpOverview() → { sourcePath: null, servers: [] } // TODO +``` + +The router exposes these procedures and the v2 ChatPane renders the surfaces, so users see slash menus and MCP affordances that don't actually work. + +### Missing from the router entirely + +- **`searchFiles`** — v1 had `workspace.searchFiles` (delegated to `@superset/workspace-fs/host`). Without it, `@file` mention autocomplete is dead. +- **`authenticateMcpServer`** — v1 had OAuth callback for new MCP servers. With MCP currently stubbed anyway, this is downstream of `getMcpOverview`. + +### Behaviors v1 runs that host-service runtime doesn't + +| Behavior | v1 location | Host-service status | +|---|---|---| +| `runSessionStartHook()` after init | `packages/chat/src/server/trpc/utils/runtime/runtime.ts:130` | Not called. Host only sets hook session id at `chat.ts:408`. | +| `runSessionEnd()` on teardown | v1 hook manager | Not called. Also no teardown path exists. | +| `onUserPromptSubmit()` before send | v1 hook manager | Not called. | +| `getSupersetMcpTools()` loaded | v1 `service.ts:113-116` | Not loaded. | +| `generateAndSetTitle()` after first / 10th send | v1 `runtime.ts:457`, `service.ts:281` | Not called. | +| `subscribeToSessionEvents` with `onLifecycleEvent` callback | v1 | Only error / sandbox events surfaced; lifecycle callback not exposed. | +| `mcpManualStatuses` per-runtime tracking | v1 | Not present. | + +### Contract differences (the adapter layer) + +| Concern | v1 | Host-service | +|---|---|---| +| Session id input | `{ sessionId, cwd }` | `{ sessionId, workspaceId }` | +| Namespace | `session.*` + `workspace.*` (split) | `chat.*` (flat) | +| Approval reply | `session.approval.respond` | `chat.respondToApproval` | +| Question reply | `session.question.respond` | `chat.respondToQuestion` | +| Plan reply | `session.plan.respond` | `chat.respondToPlan` | +| File search | `workspace.searchFiles` | _missing_ | +| MCP auth | `workspace.authenticateMcpServer` | _missing_ | + +The adapter layer is small but real: ~12 procedure renames, 2 missing procedures, payload-shape passthrough, and `cwd` → `workspaceId` resolution. + +### Verified bugs + +These are real defects in the current code, verified by reading the source. Listed in priority order. + +1. **Runtime leak on session delete (HIGH).** `useWorkspaceChatController.ts:105` calls cloud `deleteSession` after a confirmation, but the host-service runtime in `ChatRuntimeManager.runtimes` (chat.ts:315) has no dispose path. The router has no `endSession` / `disposeRuntime` procedure (chat.ts:31). Each abandoned session leaks a `RuntimeSession` for the lifetime of the host-service process. **Fix:** add `chat.endSession({ sessionId, workspaceId })` mutation that calls a new `ChatRuntimeManager.disposeRuntime(sessionId)`, run any session-end hooks, then drop from the map. Wire the call after cloud `deleteSession` succeeds. Also wire it on workspace deletion. + +2. **Cross-workspace sessionId race in runtime creation (HIGH).** `runtimeCreations` (chat.ts:316) is keyed by `sessionId` only. The check that an existing runtime's `workspaceId` matches the request (line 436) runs only on the *already-created* path, not on the *in-flight* path. So if creation for `(sessionId=X, workspaceId=A)` is mid-flight and a second request arrives for `(sessionId=X, workspaceId=B)`, the second request awaits the in-flight promise and receives a runtime bound to `workspaceA`. **Fix:** key the map by `${sessionId}:${workspaceId}`, or store the workspaceId on the in-flight promise and reject mismatches at line 442. Easy to fix, real bug under any concurrent-mount scenario (e.g., session opened in two windows). + +3. **v2 ChatPane polls at 60 fps (MEDIUM).** `WorkspaceChatInterface/ChatPaneInterface.tsx:287` passes `fps: 60` to `useWorkspaceChatDisplay`, which clamps the refetch interval to ~16 ms (`useWorkspaceChatDisplay.ts:14-16`). Combined with the still-separate `getDisplayState` + `listMessages` queries, that's ~120 RPCs per second per active chat pane. **Fix:** the `getSnapshot` collapse from §The Race Fix kills both birds — single query, sane cadence (4 fps matches v1 default). + +4. **Cloud `lastActiveAt` not updated on host send (MEDIUM).** `useWorkspaceChatController.ts:81` sorts the session selector by `lastActiveAt`. Host `sendMessage` (chat.ts:509) goes straight to the harness and never pings cloud. The cloud `chat_sessions.lastActiveAt` (`packages/trpc/src/router/chat/chat.ts:80`) only updates on metadata mutations. **Fix:** after a successful host send, host-service calls cloud `chat.updateSession({ lastActiveAt: now })` via its API client. Or: the v2 client fires a fire-and-forget cloud update alongside the host send. Either works; second is simpler. + +5. **Sessionid ↔ workspaceId not validated against cloud (MEDIUM).** Host-service trusts authenticated local callers to pair any `sessionId` with any local `workspaceId`. It validates only that the local workspace row exists (chat.ts:391). The PSK limits exposure, but a stronger binding would validate against cloud `chat_sessions.v2WorkspaceId` either at session-create time or on first runtime creation. **Fix:** either (a) one-time validation at runtime creation that checks cloud `chat_sessions.v2WorkspaceId === workspaceId`, or (b) require cloud to issue a short-lived binding token that host accepts. (a) is enough for this migration. + +6. **No host-service chat-specific tests.** Mastra harness behavior is exercised through other layers but the runtime manager has no targeted coverage for workspace binding, runtime reuse, snapshot consistency, or the bugs above. **Fix:** add tests as part of the corresponding fixes. + +## The Race Fix + +The highest-value behavior fix is still collapsing: + +```ts +getDisplayState() +listMessages() +``` + +into: + +```ts +chat.getSnapshot(input) -> { + displayState: ChatDisplayState + messages: Message[] + observedAt: number +} +``` + +On host-service, implement this on top of the existing `ChatRuntimeManager`. It should read `displayState` and `messages` inside one router procedure and return one response. Because `listMessages()` is async, this is best described as a **single server-side observation**, not a fully locked atomic snapshot. It still removes the client-side two-query race that causes mismatched message/display state. + +During migration, add the same procedure to the legacy Electron IPC runtime router so old-path users get the same client behavior. + +## Ownership Model + +| Concern | Owner during this migration | +|---|---| +| Local runtime execution | Host-service `ChatRuntimeManager` | +| Workspace cwd resolution | Host-service via `workspaceId` | +| Runtime credentials/env prep | Host-service model-provider resolver | +| Canonical chat session rows | Cloud tRPC / API | +| Session titles | Cloud tRPC `chat.updateTitle`, triggered by runtime owner when parity is restored | +| Attachments | Cloud tRPC `chat.uploadAttachment` | +| Old durable stream compatibility | Existing API routes until explicitly retired | +| Renderer UX | Existing v1 chat UI, adapted to host-service transport | + +## Phased Migration + +Each phase should be a separate PR or small PR stack. + +### P0 - Critical Bug Fixes (Verified Bugs #1, #2) + +**Goal:** close the two HIGH-severity defects in the existing host-service chat runtime before any rollout work. Both are surgical changes (≤50 lines each) and unblock everything else. + +#### Fix #1 — Runtime disposal on session delete + +References Verified Bug #1 in §Implementation Audit. + +- [ ] Add `disposeRuntime(sessionId): Promise` to `ChatRuntimeManager` (`packages/host-service/src/runtime/chat/chat.ts`): + - [ ] Look up `RuntimeSession` by `sessionId`. + - [ ] If present, run any session-end hook (placeholder ok if hook wiring lands later). + - [ ] Call `harness.abort()` and any `harness.destroy()` / cleanup the harness exposes. + - [ ] Delete from `runtimes` map. + - [ ] Idempotent — disposing an unknown session id is a no-op. +- [ ] Add `chat.endSession({ sessionId, workspaceId })` mutation to `packages/host-service/src/trpc/router/chat/chat.ts`. +- [ ] Wire client call sites: + - [ ] `useWorkspaceChatController.ts:105` (after cloud `deleteSession` succeeds). + - [ ] Workspace deletion flow — when a workspace is deleted, dispose all runtimes for sessions bound to it. +- [ ] Test: dispose then re-send to the same `sessionId` creates a fresh runtime; the map size returns to baseline after dispose. + +#### Fix #2 — Cross-workspace sessionId race in runtime creation + +References Verified Bug #2 in §Implementation Audit. + +- [ ] In `ChatRuntimeManager` (`chat.ts:316`), change `runtimeCreations` keying from `string` (sessionId) to `${sessionId}:${workspaceId}` — OR — keep the sessionId key and store `{ workspaceId, promise }` so awaiting code can validate the workspace match before returning. +- [ ] Apply the same workspace-mismatch guard that exists for already-created runtimes (`chat.ts:436`) to the in-flight path (`chat.ts:442`). A request whose `workspaceId` does not match the in-flight creation's workspace must throw, not silently get the wrong runtime. +- [ ] Test: concurrent calls with `(sessionId=X, workspaceId=A)` and `(sessionId=X, workspaceId=B)` resolve to two distinct runtimes (or one rejects with a clear "session bound to other workspace" error). Today's behavior silently shares the in-flight promise. + +#### General hardening + +- [ ] Add lightweight tests for the existing happy paths so regressions don't sneak in alongside the bug fixes: + - [ ] workspace-bound runtime creation + - [ ] same `sessionId` reused in same workspace returns the same runtime + - [ ] router procedures call the runtime manager with `{ sessionId, workspaceId }` + +**Acceptance:** runtime leaks are gone, cross-workspace race cannot happen, basic test coverage exists for the manager. + +### P1 - Add `getSnapshot` And Fix Polling Cadence (Verified Bugs #3, #4) + +**Goal:** one query per poll cycle, sane cadence, and host-side cloud `lastActiveAt` updates so the session selector keeps reordering correctly. + +#### Snapshot procedure + +- [ ] Add `workspaceTrpc.chat.getSnapshot({ sessionId, workspaceId })` to host-service. Returns `{ displayState, messages, observedAt }` from a single handler invocation. +- [ ] Add legacy `chatRuntimeServiceTrpc.session.getSnapshot` to the Electron IPC runtime path with the same shape. +- [ ] Implementation note: read `displayState` and `listMessages()` inside one router function; one server-side observation, not a fully locked atomic snapshot. Document this in code. + +#### Client cutover + +- [ ] Update host-service-backed v2 chat display (`useWorkspaceChatDisplay`) to consume `getSnapshot`. +- [ ] Update shared/v1 `useChatDisplay` to consume legacy `getSnapshot`. +- [ ] Update optimistic-message cache writes to target the snapshot cache, or invalidate/refetch the snapshot after cross-session sends. +- [ ] Keep `getDisplayState` and `listMessages` alive on both surfaces until every caller is migrated; delete in P6. + +#### Polling cadence (Verified Bug #3) + +- [ ] Drop the `fps: 60` parameter at `ChatPaneInterface.tsx:287`. Default in `useWorkspaceChatDisplay` is `fps: 4`, which matches v1 and is the right cadence for a polled chat. 60 fps means ~120 RPCs/sec per active pane today. +- [ ] Confirm there are no other call sites passing high `fps`. Grep `useWorkspaceChatDisplay` callers; flag any non-default `fps` for review. + +#### Cloud `lastActiveAt` update on host send (Verified Bug #4) + +- [ ] After a successful host `sendMessage`, update cloud `chat_sessions.lastActiveAt` so the session selector (`useWorkspaceChatController.ts:81`) keeps reordering after activity. Two viable shapes: + - [ ] **Host-side**: host-service's API client calls cloud `chat.updateSession({ sessionId, lastActiveAt: now })` after a successful send. Single source of truth, no extra client code. + - [ ] **Client-side**: v2 client fires a fire-and-forget `apiTrpcClient.chat.updateSession` alongside the host send. +- [ ] Pick one (recommend host-side) and implement. Verify selector reorders after a send. + +**Acceptance:** client chat display uses one polling query on both old and host-service paths, default polling is 4 fps, sending a message reorders its session to the top of the selector. + +### P2 - Add V1 Compatibility Adapter For Host-Service + +**Goal:** allow the old `ChatPane` UX to talk to host-service without reshaping host-service around the v1 router. + +- [ ] Add a client-side adapter or provider resolver that exposes the v1 command surface while internally calling `workspaceTrpc.chat.*`. +- [ ] Map v1 `{ sessionId, cwd }` inputs to host-service `{ sessionId, workspaceId }` inputs at the renderer boundary. +- [ ] Keep v1 UI components unchanged where possible. +- [ ] Ensure `sendMessage`, `restartFromMessage`, `stop`, approvals, questions, plans, and snapshot reads all route through the adapter. +- [ ] Add a per-workspace flag to choose Electron IPC runtime vs host-service runtime. +- [ ] Add a dev-only backend indicator for QA. + +**Acceptance:** flipping the flag for a workspace switches v1 chat runtime traffic to host-service with the same visible UI. + +### P3 - Move V1 Session Bootstrap Off REST, But Keep It Cloud-Owned + +**Goal:** stop `useChatPaneController` from calling `/api/chat/[sessionId]` directly while keeping canonical metadata in cloud. + +- [ ] Replace v1 `fetch('/api/chat/:sessionId')` session create/delete calls with cloud tRPC `apiTrpcClient.chat.createSession` / `deleteSession`. +- [ ] If v1 workspaces still need the legacy `workspaceId` column instead of `v2WorkspaceId`, extend cloud tRPC carefully rather than moving this concern to host-service. +- [ ] Keep REST routes alive for one release as compatibility/fallback because they also manage durable-stream behavior. +- [ ] Preserve `createSessionInitRunner` retry/toast/reporting behavior. +- [ ] Verify session listing still flows through Electric `chatSessions` collections. + +**Acceptance:** fresh v1 clients no longer call the REST session bootstrap routes, but cloud remains the session metadata owner. + +### P4 - Fill Host-Service Parity Gaps + +**Goal:** make the host-service path match v1 behavior closely enough for canary. References the gap list in §Implementation Audit. + +#### Slash commands (currently stubs at `chat.ts:594-635`) + +- [ ] Port slash-command discovery/resolution from `packages/chat/src/server/desktop/slash-commands/` to host-service. +- [ ] Implement `getSlashCommands` so it returns project + global commands (instead of `[]`). +- [ ] Implement `resolveSlashCommand` and `previewSlashCommand` so prompts substitute correctly (instead of `{ handled: false }`). +- [ ] Verify project-scoped (`.claude/commands`, `.agents/commands`) and global (`~/.claude/commands`) sources both resolve. + +#### File mention search (missing from router entirely) + +- [ ] Add `chat.searchFiles({ workspaceId, query, ... })` procedure to host-service. +- [ ] Wire to `@superset/workspace-fs/host` (already used elsewhere). Match v1's `workspace.searchFiles` shape so the renderer adapter is trivial. +- [ ] Verify `@file` mention autocomplete works in the host-service-backed chat pane. + +#### Session lifecycle + user-prompt hooks (currently uncalled) + +Scope: `SessionStart`, `SessionEnd`, `UserPromptSubmit` only. **`Stop` and `Notification` hook events are intentionally deferred** — they aren't blocking for canary, they overlap with agent-status UI plumbing we're not chasing in this migration. + +- [ ] In `ChatRuntimeManager.createRuntime` (after `setResourceId`, around line 408): call `runSessionStartHook()` analogous to v1 `runtime.ts:130`. +- [ ] In `ChatRuntimeManager.disposeRuntime` (added in P0 Fix #1): call `runSessionEnd()` before tearing down. +- [ ] In `ChatRuntimeManager.sendMessage` (line 509): call `onUserPromptSubmit()` before delegating to harness; respect a "blocked" return. +- [ ] Reload hook config on session re-access (matches v1 `reloadHookConfig`). +- [ ] Verify a user-defined `.claude/*.hooks.ts` `SessionStart` / `UserPromptSubmit` / `SessionEnd` hook actually fires. + +#### Title generation (currently not called) + +- [ ] Wire `generateAndSetTitle()` after the first user message and every 10th message — analogous to v1 `runtime.ts:457` and `service.ts:281`. +- [ ] Persist via cloud tRPC `chat.updateTitle({ sessionId, title })` so titles survive across devices. + +#### Superset MCP tools (currently not loaded) + +- [ ] Decide product policy: do host-service-backed chat sessions get Superset's built-in MCP tools (analytics queries etc.), or only user-configured MCP? +- [ ] If yes: load `getSupersetMcpTools()` analogous to v1 `service.ts:113-116` during runtime creation. +- [ ] If no: explicitly note in code so the gap isn't accidentally re-opened. + +#### MCP overview / authentication (currently stubbed) + +- [ ] Decide the MCP strategy for canary: + - [ ] **Defer**: keep `getMcpOverview` returning empty and hide/limit the MCP UI surfaces in v2-workspace ChatPane so users don't see broken affordances. v1 already shipped with `ENABLE_MASTRA_MCP_SERVERS = false`, so this is a credible default. + - [ ] **Port**: implement `getRuntimeMcpOverview()` and `authenticateRuntimeMcpServer()` on host-service equivalents. +- [ ] If deferring: track Mastra MCP enable as separate follow-up. + +#### Lifecycle event forwarding — deferred + +`subscribeToSessionEvents` `onLifecycleEvent` callbacks (agent start/stop, permission request notifications, etc.) are out of scope. Polling `getSnapshot` already covers what the UI needs for canary; push-style lifecycle notifications belong with the event-log work in `v2-chat-greenfield-architecture.md`. + +#### Model-provider auth / status UI + +- [ ] Verify the model picker doesn't claim a provider is authenticated when host-service can't actually run it. Today some places hardcode `isAnthropicAuthenticated = true`. Plumb the real auth state through `LocalModelProvider.hasUsableRuntimeEnv()`. + +**Acceptance:** known v1 behaviors either work on host-service or have an explicit product decision to defer (with the deferred ones surfacing no broken UI). + +### P5 - Canary And Rollout + +**Goal:** ship host-service-backed chat safely. + +- [ ] Dogfood host-service chat for developer workspaces. +- [ ] Canary a small percentage of real workspaces. +- [ ] Monitor chat error rate, runtime creation failures, provider credential failures, and Sentry. +- [ ] Keep rollback as a flag flip back to Electron IPC. +- [ ] Bake for at least one release before deleting legacy paths. + +**Acceptance:** host-service chat handles the majority of canary traffic without elevated errors or parity regressions. + +### P6 - Delete Legacy Runtime Paths + +**Goal:** one runtime owner. + +- [ ] Delete the Electron-main `chatRuntimeService` runtime router after the host-service path is default-on and stable. +- [ ] Delete legacy dual-query procedures after every caller uses `getSnapshot`. +- [ ] Delete the client adapter/flag once host-service is the only target. +- [ ] Revisit `/api/chat/[sessionId]` and durable-stream routes separately. Delete them only after confirming no remaining durable-stream consumers. +- [ ] Update docs to point to host-service chat as the runtime owner. + +**Acceptance:** runtime chat traffic goes only through host-service; cloud still owns session metadata unless a separate migration changes that. + +## Lower-Risk Notes + +Items that aren't outright bugs but should be verified during the migration. The Verified Bugs in §Implementation Audit are the load-bearing ones; these are the next tier. + +1. **Provider credentials parity.** `LocalModelProvider` reads keychain + mastracode auth storage + `~/.mastracode` config. Verify it covers every credential source the legacy desktop chat service supports (managed env config, backup slots, OAuth refresh) before flipping the flag for users with non-standard auth setups. +2. **Provider env mutation.** `applyRuntimeEnv()` mutates `process.env` globally. Concurrent runtimes for different model providers could in theory race on env keys. One provider per host-service install today, so probably fine in practice — but worth a comment in the code so a future contributor doesn't re-trip on it. +3. **Mastra memory-store assumption** in `restartFromMessage` (lines 230-245). Throws cryptically if storage isn't configured. Add a guard with a clearer error. +4. **`protectedProcedure` end-to-end check.** Confirm the chat router's `protectedProcedure` actually validates auth and that `ctx.organizationId` is populated where expected. The audit didn't trace this fully. +5. **Snapshot semantics communication.** `getSnapshot` is a single server-side observation, not an event-log atomic snapshot. Good enough for this migration; document that explicitly so anyone reading later doesn't oversell it as final consistency. + +## Relationship To Greenfield Plan + +This migration stabilizes the host-service runtime boundary that `v2-chat-greenfield-architecture.md` wants to build on. After this lands: + +- the event-log work can attach to host-service `ChatRuntimeManager`; +- `getSnapshot` becomes a temporary bridge until subscriptions/event-log reads replace polling; +- legacy Electron IPC runtime ownership can be deleted without redoing the host-service migration. + +## Summary + +Keep the existing host-service chat runtime and router. Add snapshot reads, a v1 compatibility adapter, cloud-owned session bootstrap cleanup, parity work, and a flag-based rollout. The old plan was written as if host-service chat still needed to be created; this version treats it as already present and worth preserving. diff --git a/plans/v2-chat-greenfield-architecture.md b/plans/v2-chat-greenfield-architecture.md new file mode 100644 index 00000000000..24d4c1dfa72 --- /dev/null +++ b/plans/v2-chat-greenfield-architecture.md @@ -0,0 +1,712 @@ +# V2 Chat — Greenfield Architecture Proposal + +Proposed transport + state architecture for v2 chat. Builds on `host-service-chat-architecture.md` and `chat-mastra-rebuild-execplan.md`, and takes specific patterns from `t3code-chat-architecture-reference.md`, `opencode-electron-chat-architecture-reference.md`, and `background-agents-chat-architecture-reference.md`. Starting point is the current v2 chat in `packages/chat` + `packages/host-service`, which polls `getDisplayState()` and `listMessages()` at 4 fps from two independent harness sources — that's the thing this proposal replaces. + +## Goals + +1. **Kill the polling race.** Single server-side source of truth per session; client reducer applies events in order. +2. **Keep the wire protocol stable across runtime locations.** Same `ChatEvent` shape whether the runtime is the user's laptop host-service or a cloud worker spun up for handoff. +3. **Support multi-device + multi-client on the same session.** A session opened on desktop, web, and mobile simultaneously must converge to identical state. +4. **Enable device handoff at turn boundaries.** Close laptop → continue on phone → return to laptop, with a cloud worker picking up in between. Mid-turn handoff is explicitly out of scope (see §P7). +5. **Host-service keeps owning the agent runtime and filesystem.** No change to the `host-service-chat-architecture.md` direction of travel — this is the *transport + state* layer that sits above it. (In the P5b DO path, host-service's authority scopes *down* slightly, see §Cloud-backed EventLog.) +6. **Reuse what we have.** tRPC everywhere, `@hono/node-ws` already in host-service, Zustand already in the dep tree. + +## Non-goals + +- Not migrating off tRPC. t3code's Effect-RPC is nice but the wire shape is what matters; we can get 90% of the value with tRPC subscriptions. +- Not replacing Mastracode. The harness stays — we wrap its event subscription, we don't rewrite the agent loop. +- Not event-sourcing the whole database. The *transport* is event-driven; persistence strategy is a separate decision (see §Persistence). +- Not inventing a new message shape. We already use Vercel AI SDK v6's `UIMessage` and its part types (`TextUIPart`, `ReasoningUIPart`, `ToolUIPart`, `FileUIPart`) throughout the chat UI — `ai-elements` renders them directly. We keep that as the canonical message shape. +- Not adopting `useChat` from `@ai-sdk/react`. It's built for client-initiated single-subscriber request/response. Our model is multi-subscriber, event-driven, with replay and approvals — outside its vocabulary. + +## Recommended architecture + +```mermaid +flowchart LR + subgraph Client["Client (desktop renderer · web · mobile)"] + direction TB + UI["React components"] + Store["Zustand session store
Record<id, UIMessage> · currentTurn ·
pendingApprovals · status"] + Reducer["applyEvent(state, event)"] + Recovery["gap detector
latestSeq · highestSeen"] + UI --> Store + Reducer --> Store + Recovery -.-> Store + end + + subgraph Transport["tRPC over WebSocket (@hono/node-ws)"] + direction TB + Cmd["mutations:
sendMessage · answerApproval ·
answerQuestion · interrupt"] + Replay["query: replayEvents(sessionId, fromSeq, toSeq)"] + Shell["subscription: workspace.watch"] + Detail["subscription: session.watch(sessionId)"] + end + + subgraph HostService["Host service (local) OR Cloud worker"] + direction TB + Router["chatRouter"] + Bridge["EventBridge
sequences harness events +
user commands, synthesizes
user_message_submitted"] + Dedup["CommandReceipts
(idempotent by commandId)"] + Log[("EventLog abstraction
append · readFrom · subscribe
seq-numbered · per-session")] + Shim["LocalEventLog (SQLite) today
— OR —
PostgresEventLog (P5a)
OR DO-native (P5b)"] + Harness["Mastracode harness
(unchanged)"] + Router --> Dedup + Router --> Bridge + Bridge --> Log + Log --> Shim + Bridge --> Harness + Harness --> Bridge + end + + Store -->|mutations| Cmd + Cmd --> Router + Recovery -->|on gap| Replay + Replay --> Log + Shell --> Store + Detail --> Store + Log -.->|fan-out| Shell + Log -.->|fan-out| Detail +``` + +The five load-bearing ideas, each earned from the reference docs: + +1. **Event log as the single source of truth.** Per-session, append-only, monotonically numbered. Both `getDisplayState()` and `listMessages()` become *projections of the log*, not independent queries. — from t3code. +2. **Append-style streaming deltas.** `{ messageId, partIndex, field, delta }` → client applies `messages[id].parts[i][field] += delta`. No unified diffs, no token objects. — from opencode. +3. **Dual subscription scope.** One workspace-wide stream for session summaries (sidebar), one per-session stream for message content. Each region of client state is written by exactly one stream. — from t3code. +4. **Command IDs + server dedup.** Mutations are idempotent by `commandId`; retries on reconnect don't duplicate effects. — from t3code. +5. **Gap detection + `replayEvents` RPC.** Client tracks `latestSeq` / `highestSeen`; on gap, fetch the slice. Non-negotiable for multi-device. — from t3code. + +And the four load-bearing things *we* add: + +6. **`EventLog` as an abstract interface** — swappable backend: local ring buffer + SQLite snapshot today, `s2.dev` durable stream tomorrow, without changing a line of client or router code. +7. **Writes stay as tRPC mutations.** No subscription-based commands. Everything the user does is a regular typed mutation that returns fast; its effect shows up as events on the subscription. +8. **Single session reducer per open session** (not a monolithic global store). Multiple sessions = multiple stores — keeps memory bounded and reducers small. +9. **`UIMessage` on the wire, our reducer on top.** Event payloads carry `UIMessage` and AI SDK part types verbatim. The reducer is ~150 lines of Zustand over a `Record`. `ai-elements` renders the result unchanged. No translation layer between wire and render. + +## Wire protocol + +### Events (server → client) + +Every event conforms to a base envelope, then a discriminated payload. Message and part shapes are AI SDK v6's `UIMessage` and its part union (`TextUIPart | ReasoningUIPart | ToolUIPart | FileUIPart | ...`) — not a custom type. + +```ts +import type { UIMessage } from "ai" +// UIMessage["parts"][number] is the canonical part union from AI SDK v6. +type UIPart = UIMessage["parts"][number] + +type ChatEvent = { + seq: number // monotonic per sessionId, gaps possible after replay + eventId: string + sessionId: SessionId + workspaceId: WorkspaceId + occurredAt: string // ISO + commandId: CommandId | null // null for harness-internal events + causationId: string | null // event that caused this, for tracing +} & EventPayload + +type EventPayload = + // Message lifecycle — uses UIMessage verbatim + | { type: "message.appended"; message: UIMessage } // full message added (user msg, assistant msg shell) + | { type: "message.part.appended"; messageId: UIMessage["id"]; partIndex: number; part: UIPart } // new part on an existing message + | { type: "message.part.delta"; messageId: UIMessage["id"]; partIndex: number; field: "text" | "reasoning"; delta: string } // append into part[field] + | { type: "message.part.updated"; messageId: UIMessage["id"]; partIndex: number; part: UIPart } // replace a part wholesale (tool state transitions, final snapshots) + | { type: "message.completed"; messageId: UIMessage["id"] } + // Turn lifecycle + | { type: "turn.started"; turnId: TurnId } + | { type: "turn.completed"; turnId: TurnId; status: "ok" | "error" | "cancelled" } + // Approvals / questions (out-of-band requests from the agent) + | { type: "approval.requested"; requestId: ApprovalId; tool: string; args: unknown } + | { type: "question.requested"; requestId: QuestionId; prompt: string } + // Session status — projection of turn/approval state, exposed for convenience + | { type: "status.changed"; status: "idle" | "running" | "waiting" | "error" } + | { type: "error"; error: ChatError } +``` + +Notes on the AI-SDK-aligned choices: + +- **`messageId` + `partIndex`** — `UIMessage.parts` is an ordered array, so parts are identified by position within a message, matching how `ai-elements` renders them. If we later need a stable `PartId` we can add it in `UIPart.metadata`, but it's not needed for the reducer to work. +- **`message.part.delta` uses `field: "text" | "reasoning"`** — both `TextUIPart` and `ReasoningUIPart` expose a string body that deltas append to. Tool parts (`ToolUIPart`) don't stream via delta — they emit `message.part.updated` events as their `state` transitions (`input-streaming → input-available → output-available | output-error`), which matches AI SDK v6's own tool state machine. +- **`message.appended` carries the full `UIMessage`** — cheap and unambiguous for initial user message insertion or for snapshot replay. Subsequent streaming uses `part.appended` / `part.delta` / `part.updated` to avoid re-sending the whole message. + +`seq` is per-session. Using per-session (not global) because: +- The harness emits per-session already; adding a global counter introduces a cross-session lock. +- Multi-device replay is always scoped to one session anyway. +- Matches s2.dev's per-stream sequence model cleanly. + +Workspace-level events (`session.created`, `session.metadata.changed`, `session.deleted`) ride a *separate* per-workspace log with its own `seq`. Analogous to t3code's shell vs detail split. + +### Commands (client → server, tRPC mutations) + +```ts +chat.session.sendMessage({ commandId, sessionId, text, attachments? }) +chat.session.answerApproval({ commandId, sessionId, requestId, reply: "accept" | "acceptForSession" | "decline" | "cancel", message? }) +chat.session.answerQuestion({ commandId, sessionId, requestId, answer: string }) +chat.session.interrupt({ commandId, sessionId }) +``` + +Every command carries a client-generated `commandId` (ULID). Server checks `CommandReceipts` before acting — retries after reconnect are free. The returned value is trivial (`{ ok: true, seqAfter }`) so the UI doesn't depend on it; all real state arrives on the subscription. + +### Subscriptions + +```ts +chat.workspace.watch(workspaceId) -> stream of WorkspaceEvent // sidebar +chat.session.watch({ sessionId, sinceSeq? }) -> stream of ChatEvent // open chat +``` + +`sinceSeq` is optional. If omitted, server sends a `snapshot` envelope first (`{ snapshot: ProjectedState, seqAfter: number }`), then live events. If `sinceSeq` is passed and is still in the server's replay window, server streams from there. If it's older than the window, server falls back to snapshot. + +### Replay + +```ts +chat.session.replayEvents({ sessionId, fromSeq, toSeq? }) -> ChatEvent[] +``` + +Called only when the client detects a gap (received `seq = N+2` while holding `latestSeq = N`). Subscription stream carries recent events; this query fills holes. Server implementation just reads the log. + +## Client-side design + +One Zustand store per currently-open session, plus one shared workspace store. Zustand is already the house pattern (direct dep in `packages/panes` and `apps/desktop`, ~20+ existing stores in `apps/desktop/src/renderer/stores/`), so no new primitives. + +Store shape uses `UIMessage` directly so `ai-elements` can render it without translation: + +```ts +import type { UIMessage } from "ai" + +interface SessionState { + status: "connecting" | "idle" | "running" | "waiting" | "error" + messages: Record // authoritative per-message state + messageOrder: UIMessage["id"][] // insertion order for rendering + pendingApprovals: Record + pendingQuestions: Record + currentTurn: { turnId: TurnId; messageId: UIMessage["id"] } | null + latestSeq: number + highestSeen: number + pendingBuffer: ChatEvent[] // events received out of order +} + +const useSessionStore = (sessionId: SessionId) => create((set, get) => ({ + // ... initial state ... + + applyEvent(event: ChatEvent) { + // Pure reducer. Switch on event.type: + // message.appended → messages[id] = event.message, messageOrder.push(id) + // message.part.appended→ messages[id].parts[partIndex] = event.part + // message.part.delta → messages[id].parts[partIndex][field] += event.delta + // message.part.updated → messages[id].parts[partIndex] = event.part (tool state transitions) + // message.completed → (nothing — message already reflects terminal state) + // turn.started/.completed → currentTurn = … / status = … + // approval.requested / question.requested → add to pendingApprovals / pendingQuestions + // status.changed → status = event.status + }, + + onReceive(event: ChatEvent) { + // 1. update highestSeen + // 2. if seq == latestSeq + 1: apply, drain pendingBuffer + // 3. if seq > latestSeq + 1: push to pendingBuffer, fire chat.session.replayEvents + // 4. if seq <= latestSeq: ignore (duplicate / reconnect overlap) + }, +})) +``` + +Selectors replace `useChatDisplay`: + +```ts +const messages = useSessionStore(sessionId, s => s.messageOrder.map(id => s.messages[id])) // UIMessage[] +const isRunning = useSessionStore(sessionId, s => s.status === "running") +const pendingApproval = useSessionStore(sessionId, s => firstOf(s.pendingApprovals)) +``` + +Because `messages` is already `UIMessage[]`, it flows straight into existing `ai-elements` components (``, ``, etc.). No adapter layer. + +This kills `withoutActiveTurnAssistantHistory` — the active assistant message is just the most-recent entry in `messages` whose parts are still mutating. There's no duplication between `currentMessage` and history to reconcile. + +**Per-frame coalescing.** Borrow from opencode: if `message.part.delta` events arrive faster than the browser can render, batch them and flush once per `requestAnimationFrame`. For non-delta events, apply immediately. Straight in the `onReceive` path, not in the reducer. + +**Why not `useChat` from `@ai-sdk/react`?** `useChat` holds `UIMessage[]` state and knows how to apply text deltas, but its mental model is "this client initiated this turn, this client owns the stream." Turns in our system can originate from another device; events arrive for approvals/questions/interrupts that `useChat` has no concept of; we need `sinceSeq` replay on reconnect; multiple open tabs share one session. Bending `useChat` to that model is strictly more work than owning the reducer — and the reducer over `UIMessage` is ~150 lines. + +## Host-service side + +`packages/host-service/src/runtime/chat/` grows an `EventBridge` alongside the existing `ChatRuntimeManager`: + +```ts +interface EventLog { + append(streamId: string, event: TEvent): Promise<{ seq: number }> + readFrom(streamId: string, fromSeq: number, toSeq?: number): Promise + subscribe(streamId: string, fromSeq?: number): AsyncIterable +} +``` + +Two implementations from day 1: +- `LocalEventLog` — in-memory ring buffer (N events or M minutes) with a SQLite durable backing store. Default. +- `S2EventLog` — `s2.dev` client (or whichever durable stream provider we pick). Swapped in via config when running against cloud. + +The `EventBridge`: + +1. Subscribes to `harness.subscribe()` for the session. +2. Translates raw harness events into typed `ChatEvent`s, adding `seq` (from the log) and `causationId`. +3. Appends to the `EventLog`. +4. Synthesizes `user_message_submitted` **before** calling the harness (fixes the gap called out in `chat-mastra-rebuild-execplan.md`), so user messages and assistant responses share the same ordering guarantee. +5. Serializes appends per session with an async queue — the bug call-out in the rebuild plan. + +tRPC router changes: + +- `chat.session.watch` becomes a `.subscription()` over WebSocket (host-service already has `@hono/node-ws`). +- `chat.session.replayEvents` is a regular query. +- Existing `chat.session.sendMessage` mutation wraps the `EventBridge` submission path with `CommandReceipts` dedup. +- `getDisplayState` and `listMessages` are **deleted**. If any internal code still needs a point-in-time snapshot, it reads from the projection (see §Persistence). + +## Message send flow, end-to-end + +```mermaid +sequenceDiagram + autonumber + participant User + participant Store as Client store (Zustand) + participant WS as tRPC over WS + participant Router as chatRouter (host) + participant Receipts as CommandReceipts + participant Bridge as EventBridge + participant Log as EventLog + participant Harness as Mastracode harness + + Note over Store,Log: session.watch subscription already open
client latestSeq = N + + User->>Store: submit "do X" + Store->>Store: optimistic message.appended
(local-only, pending confirm) + Store->>WS: mutation sendMessage
{ commandId, sessionId, text } + WS->>Router: mutation + + Router->>Receipts: lookup(commandId) + alt commandId already seen (retry) + Receipts-->>Router: prior result + Router-->>WS: { ok: true, seqAfter } + WS-->>Store: resolve (no-op, event already applied) + else new command + Router->>Bridge: submit(user message) + Bridge->>Log: append(message.appended{user}) + Log-->>Bridge: seq = N+1 + Bridge->>Log: append(turn.started) + Log-->>Bridge: seq = N+2 + Log-->>WS: fan-out events N+1, N+2 + WS-->>Store: onReceive(N+1) → reconcile optimistic
onReceive(N+2) → status = "running" + Bridge->>Harness: sendTurn() + Router->>Receipts: store(commandId → N+2) + Router-->>WS: { ok: true, seqAfter: N+2 } + WS-->>Store: mutation resolved + end + + loop while turn is running + Harness->>Bridge: runtime event
(token delta · tool call · approval · …) + Bridge->>Bridge: translate → ChatEvent
(stamp causationId) + Bridge->>Log: append(event) + Log-->>Bridge: seq = N+k + Log-->>WS: fan-out + WS-->>Store: onReceive(event) + + alt seq == latestSeq + 1 + Store->>Store: applyEvent
(deltas coalesce to rAF flush) + else seq > latestSeq + 1 (gap) + Store->>Store: push to pendingBuffer + Store->>WS: query replayEvents(fromSeq, toSeq) + WS->>Log: readFrom(fromSeq, toSeq) + Log-->>WS: missing events + WS-->>Store: events + Store->>Store: apply each, drain buffer + else seq <= latestSeq (duplicate / overlap) + Store->>Store: ignore + end + end + + Harness->>Bridge: turn complete + Bridge->>Log: append(message.completed, turn.completed) + Log-->>WS: fan-out + WS-->>Store: apply → status = "idle" + Store-->>User: render final assistant message +``` + +Reading notes: + +- **Steps 1-3.** Optimistic user message lands in the store immediately — no round-trip wait for the first pixel. It gets reconciled (not replaced) when the server-authored `message.appended` arrives at step 12 with the real `seq` and server-authored `messageId`. +- **Steps 4-7.** `commandId` dedup makes the whole mutation idempotent. A flaky network can retry `sendMessage` all day; the server runs the turn exactly once. +- **Steps 13 onwards.** The subscription is the hot path for everything that happens after submission. The mutation's `{ ok: true, seqAfter }` return is a hint, not the data; the UI never blocks on it. +- **Gap branch (step 22-27).** This is the reconnect / out-of-order-delivery path. Client detects `seq > latestSeq + 1`, buffers the new event, fetches the missing slice, applies in order. Same mechanism handles "backgrounded for 10 minutes, WS died" as "one dropped packet." +- **Tool approvals / user questions** follow the same loop — `approval.requested` is just another event on the stream. The user's reply is a separate `answerApproval` mutation (not shown), which produces an `approval.responded` event that unblocks the harness. + +Every box outside of `Harness` is code we write. `Harness` is unchanged Mastracode. + +## Persistence + +The event log is the wire protocol. Storage is separate. + +Short-term: SQLite table `chat_events (stream_id, seq, event_json, occurred_at)` with index on `(stream_id, seq)`. Projection tables (messages, sessions) rebuilt on startup by replaying the log, cached in memory for reads. + +This mirrors t3code but lighter — we only need projections where we need fast server-side reads, not for every aggregate. The `messages` projection probably matters; `pendingApprovals` doesn't (keep it in memory). + +Long-term (cloud): the `EventLog` interface gets a second implementation backed by a cloud-shared durable store (Postgres on Neon, by default), or alternatively the whole control plane moves to Cloudflare Durable Objects. Both are covered in detail below. + +The *client* never sees persistence directly — it always talks to `EventLog` through the tRPC/WS surface. + +## Cloud-backed `EventLog`: two paths + +Once we want cross-device visibility (same session on laptop + phone simultaneously) or a cloud-hosted agent runtime, `LocalEventLog` on host-service's SQLite is no longer enough — the log has to be reachable from any process that might own a runtime or serve a subscription. We have two credible paths; they are genuinely different and worth choosing between with eyes open. + +### Path A — Postgres-backed EventLog (`PostgresEventLog`) + +Stay on the existing stack (Neon). Add one table and adapt the `EventLog` implementation: + +```sql +CREATE TABLE chat_events ( + stream_id TEXT NOT NULL, + seq BIGINT NOT NULL, + event_json JSONB NOT NULL, + occurred_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (stream_id, seq) +); +CREATE INDEX chat_events_stream_time_idx ON chat_events (stream_id, occurred_at); +``` + +- `append()` → `INSERT` + `pg_notify('chat_session_', ...)` for live fan-out. +- `readFrom()` → `SELECT WHERE stream_id = $1 AND seq BETWEEN $2 AND $3`. +- `subscribe()` → open a `LISTEN`, stream rows as they arrive. If many subscribers per host is painful, bounce through an in-process broadcaster per session. +- Per-session write ordering → an in-process async queue in whichever host-service or cloud worker currently owns the session's writes, plus a lease row in Postgres (`chat_session_ownership`) to prevent two processes racing on the same session. +- Command dedup → same `CommandReceipts` table we already have. + +Who owns the `EventLog`: + +- Host-service continues to own its sessions locally and writes events to both `LocalEventLog` (SQLite) and `PostgresEventLog` (for cross-device visibility). +- When host-service isn't reachable, a cloud worker can claim the lease and take over writes. + +**What this buys:** cross-device read, cross-device replay, cloud-runtime handoff, and no new vendor. Everything builds on Neon, which is already in the stack. + +**What you still build:** +- Lease-based session ownership across processes. +- Per-session write serialisation inside whichever process holds the lease. +- A subscription-fan-out tier (probably host-service's tRPC WS server, or a parallel cloud Node service). +- Idle-cost story: any Node process holding N WebSockets for idle sessions is paying to be idle. + +### Path B — Durable Objects as the whole control plane + +Adopt Cloudflare Workers + Durable Objects. The `EventLog` *and* the subscription transport *and* the session-ownership story all collapse into a single primitive: one DO per session. + +``` +Clients (phone / web / desktop renderer) + │ WebSocket direct to SessionDO + ▼ +┌─────────────────────── Cloudflare ──────────────────────────┐ +│ │ +│ Stateless Worker │ +│ auth · ws-token mint · routing │ +│ │ +│ SessionDO (one per sessionId) │ +│ SQLite storage (events · messages · command_receipts) │ +│ WebSocket hub (browsers + agent runtime) │ +│ Single-threaded — ordering free, no locks │ +│ Hibernates when idle — near-zero cost per idle session │ +│ │ +│ WorkspaceDO (one per workspaceId) │ +│ sidebar index + session-list events │ +│ │ +│ D1 (global) │ +│ users · workspaces · session directory · encrypted creds │ +└─────────────────────────────────────────────────────────────┘ + +Laptop host-service (unchanged filesystem + Mastracode ownership) + ▲ + │ WebSocket connects to SessionDO as a "runtime participant" + │ subscribes for user messages, runs turns, streams events back + +Cloud runtime (Modal/Daytona/Fly container, spun up on handoff) + ▲ + │ Same "runtime participant" role; same protocol +``` + +Shifts vs. Path A: + +- Event log = the DO's per-session SQLite. No Postgres table for events. +- Transport = browsers connect WebSocket directly to the DO via a CF Worker. No host-service-hosted tRPC subscription server for chat. +- Session ownership = platform-guaranteed. Every request for `session/abc-123` goes to the same DO instance. No lease table. +- Per-session write serialisation = free. DOs are single-threaded. +- Fan-out = free. The DO owns the WebSockets and broadcasts natively. +- Idle cost = near-zero. DO hibernation holds WebSockets open while the compute sleeps. +- Host-service role = **downgrades from "source of truth + runtime" to "runtime participant only."** It connects to the SessionDO like any other client, listens for user messages, runs turns, streams events back. It no longer owns the chat event log; SessionDO does. +- Relay role = not used for chat (browsers hit DOs directly). Still used for filesystem tools and terminal. + +**What this buys on top of Path A:** +- Multi-device fan-out as a platform feature (no custom broker). +- Ownership coordination as a platform feature (no lease protocol). +- Hibernation as a platform feature (thousands of idle sessions effectively free). +- Handoff between devices / runtimes is essentially free — both laptop and cloud worker just subscribe to the same SessionDO. +- Multiplayer (multiple humans in one session) is essentially free if ever wanted. + +**What it costs:** +- **New cloud vendor.** Cloudflare enters the stack with its own deployment tooling (Wrangler), observability story, secrets model, and debugging surface. +- **Lock-in.** DOs are Cloudflare-specific. Portable to no other cloud without a rewrite. +- **Chat becomes cloud-dependent.** Host-service's local SQLite is no longer authoritative. Laptop-offline-but-chatting stops working unless we build a local-cache shadow layer. Most teams going DO-native accept this; it's a real regression worth being explicit about. +- **Agent still runs elsewhere.** DO CPU limits prevent Mastracode turns from running inside the DO. Runtime is still host-service (or cloud worker on handoff). + +### Decision framing + +The `EventLog` interface (defined in P0) fits both paths without modification, so nothing in P0-P4 is gated on which cloud path we pick. When we do pick: + +- Path A (Postgres) is the default if we want to stay on our current vendors, are willing to write the ownership + fan-out + idle-cost code ourselves, and don't need the platform-level multi-device ergonomics DOs provide. +- Path B (DOs) is the default if we're open to Cloudflare in the stack and want to shave several piles of custom infrastructure in exchange for that lock-in. + +Both are real choices. See §Phased migration — P5a and P5b — for what shipping each one actually involves. + +## Phased migration + +Phases are sequential. P0-P1 can interleave a little; P2 depends on P1; P3 depends on P2; P4 is pure cleanup; P5 is independent from P4 (can ship before or after). + +### Blockers to resolve before P0 + +These are the three questions that will bite us if we skip them: + +- [ ] **Decide `seq` ownership: in-memory counter vs. log-assigned.** Recommendation: log-assigned, returned from `append()`. Keeps `EventBridge` single-source and avoids a second counter to keep in sync. +- [ ] **Resolve provider credential scoping** for renderer ↔ host-service direct WS (the open question from `host-service-chat-architecture.md`). If we don't, P1 stalls. +- [ ] **Pick the event-type ownership location.** Recommendation: a new `packages/chat-protocol` (schema + TypeScript types only, no runtime) importable by host-service, renderer, mobile, and future cloud worker. Current `packages/trpc` is router-level, not protocol-level. + +### P0 — Protocol & EventLog interface (server-side only, no wire changes) + +**Goal:** define the contract and the local implementation, verified by a parity test against the current polling output. + +- [ ] Create `packages/chat-protocol` with: + - [ ] `ChatEvent` envelope type (seq, eventId, sessionId, workspaceId, occurredAt, commandId, causationId). + - [ ] `EventPayload` union (using AI SDK `UIMessage` and its part types — no invented message types). + - [ ] `WorkspaceEvent` union for the sidebar stream. + - [ ] Command input schemas (`sendMessage`, `answerApproval`, `answerQuestion`, `interrupt`) with `commandId: string` required. + - [ ] Zod schemas alongside TypeScript types (so tRPC inputs validate). +- [ ] Define `EventLog` interface in `packages/chat-protocol`: + - [ ] `append(streamId, event): Promise<{ seq }>` + - [ ] `readFrom(streamId, fromSeq, toSeq?): Promise` + - [ ] `subscribe(streamId, fromSeq?): AsyncIterable` + - [ ] `snapshot(streamId): Promise<{ state, seq }>` — serves snapshot-on-subscribe. +- [ ] Implement `LocalEventLog` in `packages/host-service/src/runtime/chat/event-log/`: + - [ ] In-memory ring buffer (default: last 500 events + 15 min, configurable). + - [ ] SQLite durable backing table `chat_events(stream_id, seq, event_json, occurred_at)` via existing Drizzle setup. + - [ ] `subscribe()` implemented as a hot async iterable with pull-based backpressure. +- [ ] Implement `EventBridge` in `packages/host-service/src/runtime/chat/`: + - [ ] Wrap each `RuntimeSession`'s `harness.subscribe()` in a translator that produces typed `ChatEvent`s. + - [ ] Per-session serialized async queue (fixes the ordering bug flagged in `chat-mastra-rebuild-execplan.md`). + - [ ] Synthesize `user_message_submitted` / `message.appended` events *before* calling into the harness so the log ordering is deterministic. + - [ ] Stamp `causationId` where a new event chains from an earlier one. +- [ ] Implement `CommandReceipts` in host-service: + - [ ] SQLite table `command_receipts(command_id PK, session_id, result_seq, created_at)`. + - [ ] Dedup middleware on mutations: if `commandId` exists, return the stored `{ ok: true, seqAfter }` without re-executing. + - [ ] TTL sweep (e.g. 24h). +- [ ] Parity harness: + - [ ] Test that runs a recorded session (user message → tool calls → assistant response) through `EventBridge`, then projects the resulting log through a reducer. + - [ ] Asserts the projected state equals the current `getDisplayState()` + `listMessages()` output for the same inputs. + +**Acceptance:** projection-from-log matches polling-output byte-for-byte on a corpus of ≥5 recorded sessions, including an approval flow and an interrupt. + +### P1 — tRPC surface on host-service (transport, still no client changes) + +**Goal:** expose the event log as typed tRPC subscriptions and replay query. Old polling procedures still work. + +- [ ] Add tRPC subscription procedures in `packages/host-service/src/trpc/router/chat/`: + - [ ] `chat.session.watch({ sessionId, sinceSeq? })` → `Observable`. + - [ ] `chat.workspace.watch({ workspaceId, sinceSeq? })` → `Observable`. +- [ ] Add the replay query: + - [ ] `chat.session.replayEvents({ sessionId, fromSeq, toSeq? })` → `ChatEvent[]`. +- [ ] Wire subscriptions onto `@hono/node-ws` in host-service's app.ts (the terminal route already shows the pattern). +- [ ] Add typed mutations with `commandId` on every input: + - [ ] `chat.session.sendMessage` + - [ ] `chat.session.answerApproval` + - [ ] `chat.session.answerQuestion` + - [ ] `chat.session.interrupt` + - Each wraps the equivalent existing mutation, adds `CommandReceipts` dedup, and returns `{ ok: true, seqAfter }`. +- [ ] Leave `getDisplayState` / `listMessages` intact for now. +- [ ] Write a Node test client (checked in under `packages/host-service/test/`): + - [ ] Connects via WS, subscribes, sends `sendMessage`, asserts expected event sequence arrives. + - [ ] Drops the WS mid-stream, reconnects with `sinceSeq`, asserts no duplicates and no gaps. + - [ ] Simulates a gap (forces server to skip), asserts client-side replay call fills it. + - [ ] Double-submits the same `commandId`, asserts only one effect. + +**Acceptance:** test client green across all four scenarios. No existing chat code has changed. + +### P2 — Client store, reducer, and gap detector (client-side, no UI swap yet) + +**Goal:** everything a UI component would need to render chat off the event stream, shipped as a drop-in hook. + +- [ ] Create `packages/chat/src/client/session-store/` (or similar; can also live in `apps/desktop/src/renderer/stores/chat-session/` if it stays desktop-only — recommend the package to keep web/mobile aligned): + - [ ] Zustand store factory `createSessionStore(sessionId)` with the shape defined in §Client-side design. + - [ ] Pure reducer `applyEvent(state, event)` covering every `EventPayload` variant. Use Immer or structural-clone helpers; keep it obviously pure. + - [ ] `onReceive(event)` with gap detection (`seq vs latestSeq`), pendingBuffer, dedup on `seq <= latestSeq`. + - [ ] Replay trigger: on gap, fires `chat.session.replayEvents`, applies result, drains buffer. +- [ ] Per-frame coalescer: + - [ ] Batch `message.part.delta` events by `(messageId, partIndex, field)` and flush on `requestAnimationFrame`. + - [ ] Non-delta events apply synchronously. +- [ ] Subscription hook `useChatSessionSubscription(sessionId)`: + - [ ] Opens the tRPC subscription, wires events into `onReceive`. + - [ ] Handles WS disconnect → reopen with `sinceSeq = latestSeq + 1`. + - [ ] Surfaces `status: "connecting" | "live" | "replaying" | "error"` for UI affordances. +- [ ] Workspace store (analogous, smaller) for the sidebar. +- [ ] Compatibility shim `useChatDisplay_v2(sessionId)` that exposes the same selector keys today's `useChatDisplay` returns (`messages`, `isRunning`, `currentMessage`, etc.) — this makes P3 a flag flip rather than a rewrite. +- [ ] Unit tests: + - [ ] Apply a recorded event stream to the reducer, snapshot the resulting state. + - [ ] Fuzz: randomized ordering with one missing event, assert store converges to canonical state after replay. + - [ ] Coalescer: 1000 deltas, assert at most 60 flushes per second. + +**Acceptance:** reducer tests green; compatibility shim renders an identical `ChatPane` against a scripted event stream in a Storybook story. + +### P3 — Swap UI consumers + +**Goal:** chat in the app is driven by the event stream; old polling is gated off. + +- [ ] Add a feature flag `chat.useEventStream` (off by default). +- [ ] Swap ChatPane in v2-workspace to use `useChatDisplay_v2` under the flag. +- [ ] Swap any other `useChatDisplay` consumers (grep: `apps/desktop/src/renderer/**/useChatDisplay`). +- [ ] Dogfood on the v2-chat-architecture branch for ≥1 week across desktop. +- [ ] QA matrix: + - [ ] Golden path: send message, get response, render tokens. + - [ ] Tool approval flow. + - [ ] Mid-turn interrupt. + - [ ] Reconnect during active turn. + - [ ] Two windows open on the same session (should converge). + - [ ] Rapid consecutive messages (no lost events). +- [ ] Flip the flag default to on once QA is green; keep the flag for two releases as an escape hatch. + +**Acceptance:** no regressions in the chat QA matrix vs. current main for two full releases. + +### P4 — Delete the old surface + +**Goal:** one code path for chat, not two. + +- [ ] Remove the `chat.useEventStream` flag (and any dead branches it gated). +- [ ] Delete `getDisplayState` and `listMessages` tRPC procedures. +- [ ] Delete `packages/chat/src/client/hooks/use-chat-display/`. +- [ ] Delete `withoutActiveTurnAssistantHistory` and related helpers. +- [ ] Delete the legacy surface in `packages/chat/src/server/trpc/service.ts` (the desktop-only tRPC service). Host-service is the only owner — this finishes Phase 3 of `host-service-chat-architecture.md`. +- [ ] Remove `@superset/chat/client/provider` re-exports that nothing else imports. +- [ ] Update `AGENTS.md` / relevant docs to point at the new surface. + +**Acceptance:** zero references to the deleted surface in `apps/` or `packages/` (excluding `temp/`). CI green. + +### P5 — Cloud-backed EventLog (pick A or B; independent of P4) + +P5 is where cross-device visibility and cloud-runtime handoff become possible. Two alternative paths — choose one; they're not additive. See §Cloud-backed `EventLog` above for the comparison. + +#### P5a — Postgres-backed EventLog (stay on current stack) + +**Goal:** ship `PostgresEventLog` behind the existing `EventLog` interface so host-service and cloud processes can read/write the same session log. + +- [ ] Add Drizzle migration for `chat_events(stream_id, seq, event_json, occurred_at)` with `PRIMARY KEY (stream_id, seq)` and a secondary index on `(stream_id, occurred_at)`. +- [ ] Add `chat_session_ownership(stream_id, owner_id, lease_expires_at)` for single-writer ownership. +- [ ] Implement `PostgresEventLog`: + - [ ] `append` → INSERT + `pg_notify('chat_stream_' || stream_id, seq)`. + - [ ] `readFrom` / `snapshot` → range SELECT. + - [ ] `subscribe` → `LISTEN` + in-process fan-out to multiple local subscribers on the same process. + - [ ] Ownership lease acquisition + heartbeat; lease-loss callback so the losing owner stops writing immediately. +- [ ] Dual-write for host-service: `LocalEventLog` stays the primary when host-service is reachable; `PostgresEventLog` mirrors events for cross-device visibility. (Alternative: make Postgres authoritative and drop `LocalEventLog` — simpler, but breaks laptop-offline chat.) +- [ ] Stand up a thin cloud "chat node" (small Fly / Cloudflare Worker service) that serves subscriptions to browsers when host-service is unreachable. Reads from `PostgresEventLog`; forwards writes to whichever process holds the lease. +- [ ] Per-environment config toggle for which `EventLog` implementation each process uses. +- [ ] Multi-device end-to-end test: desktop + web attached to the same session, events converge identically across both; simulated network partition on one device. +- [ ] Operational runbook: lease renewal, stuck-lease recovery, replay-window sizing, disaster recovery. +- [ ] Capacity / cost model: events per session per day × session count × retention window against Neon's pricing. + +**Acceptance:** two clients on the same session converge to identical state after a scripted sequence with a simulated network partition on one side. + +#### P5b — Durable-Objects-native control plane (Cloudflare) + +**Goal:** replace the cloud subscription tier entirely by making each session live in its own Cloudflare Durable Object. Host-service becomes a runtime participant connecting to the DO instead of a source of truth. + +- [ ] Stand up a new Cloudflare Workers deployment with Durable Objects bindings and a D1 database. +- [ ] Implement `SessionDO`: + - [ ] SQLite storage (events, messages queue, command_receipts, participants, ws_client_mapping). + - [ ] `fetch` handler for tRPC-style mutations (sendMessage, answerApproval, answerQuestion, interrupt). + - [ ] `webSocketMessage` handler for live client messages (subscribe, prompt, stop, typing, presence). + - [ ] Fan-out helper that iterates connected WSs on every event append. + - [ ] Hibernation enabled; `ws_client_mapping` persists `wsId → participantId` for rehydration. +- [ ] Implement `WorkspaceDO` for per-workspace sidebar events. +- [ ] Implement `D1` schema for users, workspaces, session directory, encrypted credentials. +- [ ] Stateless auth Worker: validate OAuth / JWT, mint WS tokens (short TTL), route to the correct DO. +- [ ] Adapt host-service to connect to `SessionDO` as a runtime participant: + - [ ] Open a long-lived WS with a runtime-auth-token (issued per-session). + - [ ] Subscribe to user-message events; feed them to Mastracode. + - [ ] Write harness events back to the DO via the same WS. + - [ ] Handle "another runtime claimed this session" eviction gracefully. +- [ ] Adapt clients (browser, mobile) to open WebSockets directly to `SessionDO` via the Worker surface, not through host-service / the relay. +- [ ] Decide local-offline story: accept that chat requires cloud connectivity, OR ship a `LocalCacheEventLog` shadow layer that mirrors the DO to host-service's SQLite and serves reads when offline. Recommend starting with "accept cloud dependency" and adding local cache only if users push back. +- [ ] Migrate the chat portion of the relay out — browser traffic for chat stops going through the relay. Filesystem / terminal traffic stays on the relay. +- [ ] Multi-device end-to-end test: desktop + web + phone on the same session, convergent state, simulated partition, simulated laptop-off with cloud worker handoff. +- [ ] Operational runbook: DO storage limits, wrangler deploy process, observability + logging, secrets rotation, Cloudflare-specific failure modes. +- [ ] Capacity / cost model: request count × DO-hours × storage × egress. + +**Acceptance:** same as P5a, plus one additional scenario: session active on laptop → laptop disconnects → phone continues seeing the event stream without any latency beyond DO wake-from-hibernation. + +**Note on going from P5a to P5b later:** the `EventLog` interface is implementation-agnostic, so moving from Postgres to DOs *is* possible later, but it's not a drop-in swap — P5b specifically changes who owns the transport (browser connects direct to DO, not through host-service), which is a bigger shift than just swapping the storage. If there's any chance we'll go DO-native, pick it the first time through P5. + +### P6 — Device handoff (turn-boundary + git courier) + +**Goal:** when the primary runtime (laptop host-service) becomes unreachable, subsequent turns can be served by a cloud worker without losing session continuity or uncommitted work. Not mid-turn — turn-boundary only. + +Depends on P5 (either P5a or P5b) — the event log has to be cloud-reachable. + +- [ ] Add event types to the protocol: + - [ ] `runtime_registered { runtimeId, kind: "host" | "cloud", capabilities }` — emitted when a runtime attaches to a session. + - [ ] `runtime_unregistered { runtimeId, reason }` — emitted on graceful disconnect or heartbeat timeout. + - [ ] `handoff_ready { branchName, commitSha, fromRuntimeId }` — emitted when a runtime stashes in-progress work for handoff. + - [ ] `turn_interrupted { turnId, reason }` — emitted when a turn is abandoned mid-flight due to runtime loss. +- [ ] Runtime ownership protocol: + - [ ] Only one `runtime_registered` is active per session at a time. + - [ ] Ownership renewal via heartbeat events every N seconds. + - [ ] New runtime can claim after heartbeat timeout + grace window. +- [ ] Laptop host-service handoff trigger: + - [ ] On graceful shutdown (lid close via macOS power notification, quit, explicit "handoff" command): stage + commit dirty workspace files to `superset/handoff/`, push, emit `handoff_ready`. + - [ ] On ungraceful loss (network drop + grace window expires): cloud coordinator declares the runtime dead, emits `turn_interrupted` if a turn was mid-flight. +- [ ] On-demand cloud runtime spawn: + - [ ] Coordinator service watches sessions with pending user messages and no active runtime. + - [ ] Spawns a Modal/Daytona/Fly-container sandbox, clones the repo, checks out the handoff branch (or `main` if no handoff branch), runs `.superset/setup.sh` if present. + - [ ] New cloud runtime registers with the session via `runtime_registered`, picks up the pending message, runs the turn, streams events back. +- [ ] Return-to-laptop flow: + - [ ] Host-service reconnects, sees later events in the log authored by a cloud runtime. + - [ ] Pulls any ephemeral branches the cloud runtime pushed (e.g. `superset/cloud//`). + - [ ] UI surface: "your session ran in the cloud while you were away; here's the diff — merge or discard?" +- [ ] Speculative runtime warming: when phone sends `typing` and no local runtime is reachable, emit a warming signal so the cloud coordinator starts spawning a sandbox speculatively. Hides cold-start latency. +- [ ] Honest limitations to document: + - [ ] Mid-turn handoff not supported — interrupted turns are lost, user re-prompts. + - [ ] Untracked / gitignored files are lost on handoff (only committed state travels). + - [ ] Long-running processes (dev server, watch modes) started inside a turn don't survive the handoff. +- [ ] Tests: laptop-closes-mid-session convergence; return-to-laptop merge UX; simultaneous runtime-claim race; cloud runtime timeout + re-spawn. + +**Acceptance:** user can close laptop mid-chat, continue from phone, see cloud-run turns land in real time, return to laptop and pull cloud-authored commits without conflict in the golden path. + +### P7 (speculative) — Event-sourced agent for mid-turn handoff + +Deliberately not in scope for the first pass. Covered in conversation because it keeps coming up. + +Seamless mid-turn handoff requires the agent's in-flight state (LLM stream position, partially-applied tool results) to be reconstructible from the event log, which in turn requires the agent to emit *intent* events before acting and to be designed around replay-safe tools. That's a ~quarter-scale of work on top of P6 and produces a quality regression on re-prompted LLM streams. Worth revisiting only if long autonomous runs + device handoff become first-class product requirements. + +### Summary timeline + +Rough sizing (for a single engineer, not including meetings / reviews): + +| Phase | Est. | Can parallelize? | Ships behind flag? | +|-------|------|------------------|--------------------| +| Blockers | 1-2 days | — | n/a | +| P0 | 1-2 weeks | No | n/a (no wire change) | +| P1 | 1 week | Partial with P0 end | n/a (additive) | +| P2 | 1-2 weeks | After P1 | n/a | +| P3 | 1 week + bake time | After P2 | Yes | +| P4 | 2-3 days | After P3 bake | n/a | +| P5a (Postgres) | 2-3 weeks | Any time after P2 | Yes (config) | +| P5b (Durable Objects) | 4-6 weeks | Any time after P2; mutually exclusive with P5a | Yes (config) | +| P6 (handoff) | 2-3 weeks | After P5 | Yes | +| P7 (event-sourced agent) | quarter-scale | Speculative / not on critical path | n/a | + +P0-P4 is the critical path for killing the race condition and unblocking same-machine multi-window. P5 is where cross-device + cloud-runtime becomes possible; P5a and P5b are alternatives, not additive. P6 delivers the close-laptop-continue-on-phone handoff UX on top of whichever P5 path was chosen. P7 is explicitly not on the roadmap — listed so we don't accidentally rediscover why it's hard. + +## Open questions + +- **P5 path: Postgres or Durable Objects?** The single biggest deferred decision. Postgres keeps us on our current vendors and requires more custom infra (ownership leases, fan-out tier, idle-cost story). DOs replace several layers with platform primitives but bring Cloudflare into the stack and make chat cloud-dependent. Worth resolving before P5 starts, but not before — P0-P4 is the same either way. +- **Local-offline chat in the DO path.** If we go P5b, do we accept that chat requires internet, or build a `LocalCacheEventLog` shadow layer that mirrors the DO? Recommend starting without it and adding only if users surface friction. +- **Ownership of `commandId` dedup window.** Keep `CommandReceipts` in our own durable store regardless of P5 path — cleaner semantics, survives backend swaps, less magic. +- **Subscription granularity for workspace stream.** One subscription per workspace or one per user-across-workspaces? Recommend per-workspace — matches the current sidebar scope and keeps events small. +- **Backpressure.** If a slow client can't keep up with token deltas, do we drop, coalesce, or disconnect? Recommend server-side (or DO-side) ring-per-subscriber with coalesce-latest-wins on delta events, hard-drop on sustained overflow with an `error` event sent down the wire. +- **Mobile background reconnect.** When iOS backgrounds the app for 20 minutes, the WS dies. On resume: reopen + `sinceSeq` + replay. Should be free with this design, but worth an explicit test plan. +- **Provider credential scoping for renderer-direct connection.** The open question from `host-service-chat-architecture.md` still applies — if the renderer talks to host-service WS directly (P5a) or a DO directly (P5b), provider creds need to be scoped cleanly. Resolve before P1. +- **Handoff workspace state.** P6 uses git as the filesystem courier. Fine for committed state; what's our story for untracked files the agent wrote but hadn't committed? Options: auto-stash to a handoff branch, snapshot workspace to object storage, or accept the loss. Resolve before P6. + +## Summary + +tRPC subscriptions over the WebSocket we already have, wrapping an `EventLog` abstraction. `LocalEventLog` on SQLite for the local case. Two cloud options: **`PostgresEventLog` on Neon (P5a)** to stay on current vendors, or **Durable Objects (P5b)** to fold event log + subscription transport + session ownership + hibernation into one Cloudflare primitive at the cost of vendor lock-in. Per-session event streams with monotonic `seq`, `commandId`-keyed idempotent mutations, `replayEvents` on gap, and a Zustand reducer over `UIMessage` that applies events in order. Two subscription scopes — workspace for the sidebar, session for the open chat — each writing to a disjoint region of client state. On top of P5, a **P6 handoff flow** (turn-boundary, git as filesystem courier, on-demand cloud runtime) covers close-laptop-continue-on-phone. Mid-turn handoff is explicitly out of scope. Everything above is justification.