Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,6 @@ storybook-static

# Build artifact (esbuild/sidecar output accidentally placed in project root)
main.js

# Local iteration tooling (live-agent probes, ad-hoc harness scripts)
.scratch/
25 changes: 21 additions & 4 deletions apps/agent-server/agents/claude/claude-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import {
isSessionActive,
type SessionState,
} from "./claude-session";
import { attachQuery, detachQuery } from "../../app-registrar";

// Internal-only type for the private workspace init helper
interface WorkspaceInitOptions {
Expand Down Expand Up @@ -444,6 +445,10 @@ export class ClaudeAgentHandler implements AgentHandler {
// Mutable context accumulated during the streaming loop.
const ctx = createStreamContext();

// Hoisted so the finally block can detach from the app-registrar even
// when an early throw (e.g. invalid cwd) happens before SDK construction.
let queryResult: ReturnType<typeof claudeSDK> | undefined;

try {
const invalidWorkspacePathError = getInvalidWorkspacePathError(options.cwd);
if (invalidWorkspacePathError) {
Expand Down Expand Up @@ -485,12 +490,18 @@ export class ClaudeAgentHandler implements AgentHandler {
`[RESUME-DEBUG][${generatorId}] SDK options: resume=${sdkOptions.resume ?? "none"} cwd=${sdkOptions.cwd} model=${sdkOptions.model} permissionMode=${sdkOptions.permissionMode}`
);
const tSdkSpawn = Date.now();
const queryResult = claudeSDK({ prompt: promptInput, options: sdkOptions });
queryResult = claudeSDK({ prompt: promptInput, options: sdkOptions });
console.log(
`[TIMING][${generatorId}] claudeSDK() constructor returned in ${Date.now() - tSdkSpawn}ms`
);

claudeQueries.set(sessionId, queryResult);
// Pin the session's initial SDK servers (the `deus` tools) so any
// subsequent AAP-MCP broadcast preserves them. Without this, the SDK's
// setMcpServers would disconnect the deus transport and hang any
// tool call that's mid-flight on it (e.g. `launch_app` itself, which
// triggers the very broadcast that kills its own transport).
attachQuery(queryResult, sdkOptions.mcpServers ?? {});
session.generator = queryResult[Symbol.asyncIterator]();

// Per-message options (constant for the lifetime of this generator).
Expand Down Expand Up @@ -666,9 +677,15 @@ export class ClaudeAgentHandler implements AgentHandler {
});
}
} finally {
// Only clean up if this generator still owns the session.
// A rapid re-query can replace the session before this finally runs;
// blindly deleting would wipe the new session's state.
// Detach unconditionally: the registrar's `protectedByQuery` map is keyed
// by Query identity, so even if a rapid re-query has already replaced
// this session, our specific Query handle still needs its entry cleared
// (otherwise it leaks for the rest of the process lifetime).
if (queryResult) detachQuery(queryResult);

// The session/query maps are keyed by sessionId, so we only delete them
// if THIS generator still owns the slot — a re-query may have already
// overwritten it with a fresh Query/SessionState we mustn't wipe.
if (claudeSessions.owns(sessionId, session)) {
claudeQueries.delete(sessionId);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
claudeSessions.delete(sessionId);
Expand Down
161 changes: 161 additions & 0 deletions apps/agent-server/agents/deus-tools/apps.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
// agent-server/agents/deus-tools/apps.ts
//
// AAP lifecycle tools surfaced in the Deus MCP server. Thin wrappers: each
// tool validates its args, calls `EventBroadcaster.requestXxx(...)` to hit
// the backend's apps.service (where real state + process management lives),
// and formats the result for the agent.
//
// The backend is the single writer for AAP state. These tools never touch
// the registrar directly — when a launch succeeds, the backend's mcp-bridge
// fires `aap/register-mcp` back to the agent-server, which the registrar
// handles separately. Two clean halves, one choke point per direction.

import { tool } from "@anthropic-ai/claude-agent-sdk";
import type { SdkMcpToolDefinition } from "@anthropic-ai/claude-agent-sdk";
import { z } from "zod";
import { getErrorMessage } from "@shared/lib/errors";
import { EventBroadcaster } from "../../event-broadcaster";

// ----------------------------------------------------------------------------
// Response helpers
// ----------------------------------------------------------------------------

function textResult(text: string) {
return { content: [{ type: "text" as const, text }] };
}

/**
* Wrap a tool handler with error catching. Returns error text instead of
* throwing — same pattern as browser/simulator tools. An exception escaping
* a tool handler would break the ongoing agent turn.
*/
function withErrorCatch<T>(
fn: (args: T) => Promise<{ content: Array<{ type: string; [k: string]: unknown }> }>
) {
return async (args: T) => {
try {
return await fn(args);
} catch (err) {
return textResult(`AAP error: ${getErrorMessage(err)}`);
}
};
}

// ----------------------------------------------------------------------------
// Factory
// ----------------------------------------------------------------------------

export function createAppsTools(sessionId: string): SdkMcpToolDefinition<any>[] {
return [
// -- ListApps -------------------------------------------------------------
tool(
"list_apps",
`List installed Deus apps and which are currently running in YOUR workspace.

Returns a JSON object:
{ apps: InstalledApp[], runningAppIds: string[] }
Each app has { id, name, description, version, icon?, bootstrap? }.
Use the app's \`id\` (e.g. "deus.mobile-use") as the argument to launch_app.

Running apps are auto-scoped to the agent's current session/workspace;
you don't need (and can't pass) a workspaceId.`,
{},
withErrorCatch(async () => {
console.log(`[deusMCPServer] list_apps invoked for session ${sessionId}`);
const response = await EventBroadcaster.requestListApps({ sessionId });
return textResult(JSON.stringify(response, null, 2));
})
),

// -- LaunchApp ------------------------------------------------------------
tool(
"launch_app",
`Launch an installed Deus app in YOUR current workspace. The backend
spawns the app's subprocess, waits for its ready probe, and (on success)
registers its MCP tools into THIS agent session. New tools appear as
\`mcp__{app_server_name}__*\` (e.g. \`mcp__deus_mobile_use__snapshot\` for
the mobile-use app) within a few seconds — they're immediately callable.

One instance per (appId, workspace): a duplicate launch returns the
existing runningAppId. The app's manifest \`bootstrap\` — a short help
string — is returned so you know how to use its tools.

Workspace is inferred from your session — do NOT pass a workspaceId.`,
{
appId: z.string().describe('App id (e.g. "deus.mobile-use"). Get from list_apps.'),
},
withErrorCatch(async (args: { appId: string }) => {
console.log(
`[deusMCPServer] launch_app invoked for session ${sessionId} appId=${args.appId}`
);
const response = await EventBroadcaster.requestLaunchApp({
appId: args.appId,
sessionId,
});

const lines = [
`Launched ${args.appId}`,
` runningAppId: ${response.runningAppId}`,
` url: ${response.url}`,
];
if (response.bootstrap) {
lines.push("", `App bootstrap hint:`, response.bootstrap);
}
lines.push(
"",
`The app's MCP tools (mcp__{server}__*) will appear in your tool list shortly.`
);
return textResult(lines.join("\n"));
})
),

// -- StopApp --------------------------------------------------------------
tool(
"stop_app",
`Stop a running Deus app by its runningAppId. The backend sends SIGTERM,
waits for the stop timeout, then SIGKILLs if needed. The app's MCP tools
are automatically removed from your tool list.`,
{
runningAppId: z.string().describe("The runningAppId returned by launch_app."),
},
withErrorCatch(async (args: { runningAppId: string }) => {
console.log(
`[deusMCPServer] stop_app invoked for session ${sessionId} runningAppId=${args.runningAppId}`
);
const response = await EventBroadcaster.requestStopApp({
runningAppId: args.runningAppId,
});
return textResult(
response.success
? `Stopped runningAppId ${args.runningAppId}.`
: `Failed to stop runningAppId ${args.runningAppId}.`
);
})
),

// -- ReadAppSkill ---------------------------------------------------------
tool(
"read_app_skill",
`Read the detailed usage docs ("skill") an installed Deus app ships with.
The \`launch_app\` tool deliberately keeps its response lean — call this
only when you need deeper guidance on how to drive the app's MCP tools
(typical triggers: first use of an app in a session, or unfamiliar tool
names showing up after a launch). Content is markdown; may include
command examples, workflow patterns, and JSON shape references.

Returns an empty string if the app declares no skills.`,
{
appId: z.string().describe('App id (e.g. "deus.mobile-use"). Get from list_apps.'),
},
withErrorCatch(async (args: { appId: string }) => {
console.log(
`[deusMCPServer] read_app_skill invoked for session ${sessionId} appId=${args.appId}`
);
const response = await EventBroadcaster.requestReadAppSkill({ appId: args.appId });
return textResult(
response.content.length > 0 ? response.content : `No skills declared for ${args.appId}.`
);
})
),
];
}
9 changes: 6 additions & 3 deletions apps/agent-server/agents/deus-tools/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
// agent-server/agents/deus-tools/index.ts
// Composes workspace + browser + simulator + recording tools into the Deus MCP server.
// The RecordingBridge snoops on browser tool executions to automatically emit
// recording events — the agent never needs to call recording_event manually.
// Composes workspace + browser + simulator + recording + apps tools into the
// Deus MCP server. The RecordingBridge snoops on browser tool executions to
// automatically emit recording events — the agent never needs to call
// recording_event manually.

import { createSdkMcpServer } from "@anthropic-ai/claude-agent-sdk";
import { createWorkspaceTools } from "./workspace";
import { createBrowserTools } from "./browser";
import { createAppsTools } from "./apps";

import { createSimulatorTools } from "./simulator";
import { createRecordingTools, getSessionManager } from "./recording";
Expand Down Expand Up @@ -75,6 +77,7 @@ export function createDeusMCPServer(sessionId: string) {
bridge.onBrowserAction(action);
}),
...createSimulatorTools(sessionId),
...createAppsTools(sessionId),
...wrappedRecordingTools,
],
});
Expand Down
8 changes: 8 additions & 0 deletions apps/agent-server/agents/session-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ export class SessionStore<T> {
this.sessions.clear();
}

/**
* Iterate all stored values. Used by AAP app-registrar to broadcast
* `setMcpServers` updates to every live Claude Query.
*/
values(): IterableIterator<T> {
return this.sessions.values();
}

/**
* Check if the session reference matches (for ownership guards).
*
Expand Down
Loading
Loading