zvadaadam · zvadaadam · Apr 20, 2026 · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/.gitignore b/.gitignore
@@ -101,3 +101,6 @@ storybook-static
 
 # Build artifact (esbuild/sidecar output accidentally placed in project root)
 main.js
+
+# Local iteration tooling (live-agent probes, ad-hoc harness scripts)
+.scratch/
diff --git a/apps/agent-server/agents/claude/claude-handler.ts b/apps/agent-server/agents/claude/claude-handler.ts
@@ -46,6 +46,7 @@ import {
   isSessionActive,
   type SessionState,
 } from "./claude-session";
+import { attachQuery, detachQuery } from "../../app-registrar";
 
 // Internal-only type for the private workspace init helper
 interface WorkspaceInitOptions {
@@ -444,6 +445,10 @@ export class ClaudeAgentHandler implements AgentHandler {
     // Mutable context accumulated during the streaming loop.
     const ctx = createStreamContext();
 
+    // Hoisted so the finally block can detach from the app-registrar even
+    // when an early throw (e.g. invalid cwd) happens before SDK construction.
+    let queryResult: ReturnType<typeof claudeSDK> | undefined;
+
     try {
       const invalidWorkspacePathError = getInvalidWorkspacePathError(options.cwd);
       if (invalidWorkspacePathError) {
@@ -485,12 +490,18 @@ export class ClaudeAgentHandler implements AgentHandler {
         `[RESUME-DEBUG][${generatorId}] SDK options: resume=${sdkOptions.resume ?? "none"} cwd=${sdkOptions.cwd} model=${sdkOptions.model} permissionMode=${sdkOptions.permissionMode}`
       );
       const tSdkSpawn = Date.now();
-      const queryResult = claudeSDK({ prompt: promptInput, options: sdkOptions });
+      queryResult = claudeSDK({ prompt: promptInput, options: sdkOptions });
       console.log(
         `[TIMING][${generatorId}] claudeSDK() constructor returned in ${Date.now() - tSdkSpawn}ms`
       );
 
       claudeQueries.set(sessionId, queryResult);
+      // Pin the session's initial SDK servers (the `deus` tools) so any
+      // subsequent AAP-MCP broadcast preserves them. Without this, the SDK's
+      // setMcpServers would disconnect the deus transport and hang any
+      // tool call that's mid-flight on it (e.g. `launch_app` itself, which
+      // triggers the very broadcast that kills its own transport).
+      attachQuery(queryResult, sdkOptions.mcpServers ?? {});
       session.generator = queryResult[Symbol.asyncIterator]();
 
       // Per-message options (constant for the lifetime of this generator).
@@ -666,9 +677,15 @@ export class ClaudeAgentHandler implements AgentHandler {
         });
       }
     } finally {
-      // Only clean up if this generator still owns the session.
-      // A rapid re-query can replace the session before this finally runs;
-      // blindly deleting would wipe the new session's state.
+      // Detach unconditionally: the registrar's `protectedByQuery` map is keyed
+      // by Query identity, so even if a rapid re-query has already replaced
+      // this session, our specific Query handle still needs its entry cleared
+      // (otherwise it leaks for the rest of the process lifetime).
+      if (queryResult) detachQuery(queryResult);
+
+      // The session/query maps are keyed by sessionId, so we only delete them
+      // if THIS generator still owns the slot — a re-query may have already
+      // overwritten it with a fresh Query/SessionState we mustn't wipe.
       if (claudeSessions.owns(sessionId, session)) {
         claudeQueries.delete(sessionId);
         claudeSessions.delete(sessionId);

diff --git a/apps/agent-server/agents/deus-tools/apps.ts b/apps/agent-server/agents/deus-tools/apps.ts
@@ -0,0 +1,161 @@
+// agent-server/agents/deus-tools/apps.ts
+//
+// AAP lifecycle tools surfaced in the Deus MCP server. Thin wrappers: each
+// tool validates its args, calls `EventBroadcaster.requestXxx(...)` to hit
+// the backend's apps.service (where real state + process management lives),
+// and formats the result for the agent.
+//
+// The backend is the single writer for AAP state. These tools never touch
+// the registrar directly — when a launch succeeds, the backend's mcp-bridge
+// fires `aap/register-mcp` back to the agent-server, which the registrar
+// handles separately. Two clean halves, one choke point per direction.
+
+import { tool } from "@anthropic-ai/claude-agent-sdk";
+import type { SdkMcpToolDefinition } from "@anthropic-ai/claude-agent-sdk";
+import { z } from "zod";
+import { getErrorMessage } from "@shared/lib/errors";
+import { EventBroadcaster } from "../../event-broadcaster";
+
+// ----------------------------------------------------------------------------
+// Response helpers
+// ----------------------------------------------------------------------------
+
+function textResult(text: string) {
+  return { content: [{ type: "text" as const, text }] };
+}
+
+/**
+ * Wrap a tool handler with error catching. Returns error text instead of
+ * throwing — same pattern as browser/simulator tools. An exception escaping
+ * a tool handler would break the ongoing agent turn.
+ */
+function withErrorCatch<T>(
+  fn: (args: T) => Promise<{ content: Array<{ type: string; [k: string]: unknown }> }>
+) {
+  return async (args: T) => {
+    try {
+      return await fn(args);
+    } catch (err) {
+      return textResult(`AAP error: ${getErrorMessage(err)}`);
+    }
+  };
+}
+
+// ----------------------------------------------------------------------------
+// Factory
+// ----------------------------------------------------------------------------
+
+export function createAppsTools(sessionId: string): SdkMcpToolDefinition<any>[] {
+  return [
+    // -- ListApps -------------------------------------------------------------
+    tool(
+      "list_apps",
+      `List installed Deus apps and which are currently running in YOUR workspace.
+
+Returns a JSON object:
+  { apps: InstalledApp[], runningAppIds: string[] }
+Each app has { id, name, description, version, icon?, bootstrap? }.
+Use the app's \`id\` (e.g. "deus.mobile-use") as the argument to launch_app.
+
+Running apps are auto-scoped to the agent's current session/workspace;
+you don't need (and can't pass) a workspaceId.`,
+      {},
+      withErrorCatch(async () => {
+        console.log(`[deusMCPServer] list_apps invoked for session ${sessionId}`);
+        const response = await EventBroadcaster.requestListApps({ sessionId });
+        return textResult(JSON.stringify(response, null, 2));
+      })
+    ),
+
+    // -- LaunchApp ------------------------------------------------------------
+    tool(
+      "launch_app",
+      `Launch an installed Deus app in YOUR current workspace. The backend
+spawns the app's subprocess, waits for its ready probe, and (on success)
+registers its MCP tools into THIS agent session. New tools appear as
+\`mcp__{app_server_name}__*\` (e.g. \`mcp__deus_mobile_use__snapshot\` for
+the mobile-use app) within a few seconds — they're immediately callable.
+
+One instance per (appId, workspace): a duplicate launch returns the
+existing runningAppId. The app's manifest \`bootstrap\` — a short help
+string — is returned so you know how to use its tools.
+
+Workspace is inferred from your session — do NOT pass a workspaceId.`,
+      {
+        appId: z.string().describe('App id (e.g. "deus.mobile-use"). Get from list_apps.'),
+      },
+      withErrorCatch(async (args: { appId: string }) => {
+        console.log(
+          `[deusMCPServer] launch_app invoked for session ${sessionId} appId=${args.appId}`
+        );
+        const response = await EventBroadcaster.requestLaunchApp({
+          appId: args.appId,
+          sessionId,
+        });
+
+        const lines = [
+          `Launched ${args.appId}`,
+          `  runningAppId: ${response.runningAppId}`,
+          `  url: ${response.url}`,
+        ];
+        if (response.bootstrap) {
+          lines.push("", `App bootstrap hint:`, response.bootstrap);
+        }
+        lines.push(
+          "",
+          `The app's MCP tools (mcp__{server}__*) will appear in your tool list shortly.`
+        );
+        return textResult(lines.join("\n"));
+      })
+    ),
+
+    // -- StopApp --------------------------------------------------------------
+    tool(
+      "stop_app",
+      `Stop a running Deus app by its runningAppId. The backend sends SIGTERM,
+waits for the stop timeout, then SIGKILLs if needed. The app's MCP tools
+are automatically removed from your tool list.`,
+      {
+        runningAppId: z.string().describe("The runningAppId returned by launch_app."),
+      },
+      withErrorCatch(async (args: { runningAppId: string }) => {
+        console.log(
+          `[deusMCPServer] stop_app invoked for session ${sessionId} runningAppId=${args.runningAppId}`
+        );
+        const response = await EventBroadcaster.requestStopApp({
+          runningAppId: args.runningAppId,
+        });
+        return textResult(
+          response.success
+            ? `Stopped runningAppId ${args.runningAppId}.`
+            : `Failed to stop runningAppId ${args.runningAppId}.`
+        );
+      })
+    ),
+
+    // -- ReadAppSkill ---------------------------------------------------------
+    tool(
+      "read_app_skill",
+      `Read the detailed usage docs ("skill") an installed Deus app ships with.
+The \`launch_app\` tool deliberately keeps its response lean — call this
+only when you need deeper guidance on how to drive the app's MCP tools
+(typical triggers: first use of an app in a session, or unfamiliar tool
+names showing up after a launch). Content is markdown; may include
+command examples, workflow patterns, and JSON shape references.
+
+Returns an empty string if the app declares no skills.`,
+      {
+        appId: z.string().describe('App id (e.g. "deus.mobile-use"). Get from list_apps.'),
+      },
+      withErrorCatch(async (args: { appId: string }) => {
+        console.log(
+          `[deusMCPServer] read_app_skill invoked for session ${sessionId} appId=${args.appId}`
+        );
+        const response = await EventBroadcaster.requestReadAppSkill({ appId: args.appId });
+        return textResult(
+          response.content.length > 0 ? response.content : `No skills declared for ${args.appId}.`
+        );
+      })
+    ),
+  ];
+}
diff --git a/apps/agent-server/agents/deus-tools/index.ts b/apps/agent-server/agents/deus-tools/index.ts
@@ -1,11 +1,13 @@
 // agent-server/agents/deus-tools/index.ts
-// Composes workspace + browser + simulator + recording tools into the Deus MCP server.
-// The RecordingBridge snoops on browser tool executions to automatically emit
-// recording events — the agent never needs to call recording_event manually.
+// Composes workspace + browser + simulator + recording + apps tools into the
+// Deus MCP server. The RecordingBridge snoops on browser tool executions to
+// automatically emit recording events — the agent never needs to call
+// recording_event manually.
 
 import { createSdkMcpServer } from "@anthropic-ai/claude-agent-sdk";
 import { createWorkspaceTools } from "./workspace";
 import { createBrowserTools } from "./browser";
+import { createAppsTools } from "./apps";
 
 import { createSimulatorTools } from "./simulator";
 import { createRecordingTools, getSessionManager } from "./recording";
@@ -75,6 +77,7 @@ export function createDeusMCPServer(sessionId: string) {
         bridge.onBrowserAction(action);
       }),
       ...createSimulatorTools(sessionId),
+      ...createAppsTools(sessionId),
       ...wrappedRecordingTools,
     ],
   });

diff --git a/apps/agent-server/agents/session-store.ts b/apps/agent-server/agents/session-store.ts
@@ -42,6 +42,14 @@ export class SessionStore<T> {
     this.sessions.clear();
   }
 
+  /**
+   * Iterate all stored values. Used by AAP app-registrar to broadcast
+   * `setMcpServers` updates to every live Claude Query.
+   */
+  values(): IterableIterator<T> {
+    return this.sessions.values();
+  }
+
   /**
    * Check if the session reference matches (for ownership guards).
    *